#!/usr/bin/env python
# -*- coding: utf-8 -*-
# Copyright (C) 2015, ENPC
#     Author(s): Sylvain Doré
#
# This file is part of the air quality modeling system Polyphemus.
#
# Polyphemus is developed in the INRIA project-team CLIME and in
# the ENPC - EDF R&D joint laboratory CEREA.
#
# Polyphemus is free software; you can redistribute it and/or modify it under
# the terms of the GNU General Public License as published by the Free
# Software Foundation; either version 2 of the License, or (at your option)
# any later version.
#
# Polyphemus is distributed in the hope that it will be useful, but WITHOUT
# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
# FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
# more details.
#
# For more information, visit the Polyphemus web site:
#      http://cerea.enpc.fr/polyphemus/

import optparse, os, sys
from glob import glob
from os import path


#############
# ARGUMENTS #
#############

usage = """%prog [NAME]

Copies the dataset 'NAME' into current directory, taking advantage of a local \
cache with hardlinks to avoid duplicating files. It is useful for testing \
purpose.

Environment:
- The environment variable 'DATA_PATH' points to the directory that contains all \
the datasets.
- The environment variable 'DATA_CACHE_PATH' points to the directory used as \
local data cache.

Argument:
- A dataset of name 'NAME' is understood as the data under the directory \
"$DATA_PATH/NAME".
- If no argument is given, then the current directory name is taken as \
the dataset name.

Effect:
1) The dataset content is put into '$DATA_CACHE_PATH' if not already there.
(The cache system is smart enough to share common data between datasets.)
2) The dataset content is then hardlinked into the current directory.

/!\ Warning:
The local cache directory '$DATA_CACHE_PATH' have to be purged \
manually if it takes too much place.
"""

parser = optparse.OptionParser(usage=usage)

(option, args) = parser.parse_args()


def fail(msg):
    print msg
    print "Use option '-h' or '--help' for information about usage."
    sys.exit(1)


def format_path(input_path):
    if ':' in input_path:
        host, remote_path = input_path.split(':')
        return host + ':' + path.normpath(remote_path)
    else:
        return path.realpath(input_path)

### Environment variables.

data_path = os.environ.get('DATA_PATH')
data_cache_path = os.environ.get('DATA_CACHE_PATH')
destination_path = "."

if not data_path:
    fail("The environment variable 'DATA_PATH' must point to your shared data "\
         "store base directory.")
if not ':' in data_path and not path.isdir(data_path):
    fail("The environment variable 'DATA_PATH' is not an existing directory. "\
         "It must must point to your shared data store base directory. "\
         "Currently:\nDATA_PATH is {0}".format(data_path))

if not data_cache_path:
    fail("The environment variable 'DATA_CACHE_PATH' must point to your local "\
         "data cache base directory.")
if not path.isdir(data_cache_path):
    fail("The environment variable 'DATA_CACHE_PATH' is not an existing "\
         "directory. It must must point to your local data cache base directory. "\
         "Currently:\n"\
         "DATA_CACHE_PATH is {0}".format(data_cache_path))

if os.lstat(destination_path).st_dev != os.lstat(data_cache_path).st_dev:
    fail("The local data cache at '$DATA_CACHE_PATH' and the destination path must be "\
         "on the same filesystem to allow hardlinks between them. Currently:\n"\
         "destination path is {0}\n"\
         "DATA_CACHE_PATH is {1}".format(destination_path, data_cache_path))

destination_path = format_path(destination_path)
data_path = format_path(data_path)
data_cache_path = format_path(data_cache_path)

### Command line arguments.

if args:
    dataset = path.normpath(args[0]).replace('/', '_')
else:
    dataset = path.relpath(".", "..")

dataset_path = data_path + "/" + dataset + "/"

if ':' not in dataset_path and not path.exists(dataset_path):
    fail("The requested dataset \"{0}\" does not exist".format(dataset_path))


########################
# File synchronization #
########################

def mkchdir(input_path):
    if not path.exists(input_path):
        os.makedirs(input_path)
    os.chdir(input_path)
    print "cd", os.getcwd()


def run(command):
    status = os.system(command)
    if status != 0:
        print "[ERROR] The command:\n{0}\n"\
            "has failed failed with return status: {1}".format(command, status)
        sys.exit(status)


def rsync(source, extra_options=""):
    # '--append-verify' has been removed from rsync options because
    # rsync seems buggy when using both '--link-dest' and '--append-verify'
    # with files of the same name as source but different.
    rsync_options = "-Lav " + extra_options
    linkdest = glob(data_cache_path + "/*/")
    rsync_link_options = " --link-dest=".join([""] + linkdest)
    print run("rsync {0} {1} {2} .".format(rsync_options, rsync_link_options,
                                           source))


print "### 1/2 Update the local cache..."
mkchdir(data_cache_path + "/" + dataset + "/")
rsync(dataset_path, "--delete")

print "### 2/2 Use the local cache to create the requested files..."
mkchdir(destination_path + "/")
rsync(data_cache_path + "/" + dataset + "/")
