#!/usr/bin/env python

# Copyright (C)  2008, ENPC - INRIA - EDF R&D
#     Author(s): Vivien Mallet
#
# This file is part of the air quality modeling system Polyphemus.
#
# Polyphemus is developed in the INRIA - ENPC joint project-team CLIME and in
# the ENPC - EDF R&D joint laboratory CEREA.
#
# Polyphemus is free software; you can redistribute it and/or modify it under
# the terms of the GNU General Public License as published by the Free
# Software Foundation; either version 2 of the License, or (at your option)
# any later version.
#
# Polyphemus is distributed in the hope that it will be useful, but WITHOUT
# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
# FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
# more details.
#
# For more information, visit the Polyphemus web site:
#      http://cerea.enpc.fr/polyphemus/

# This program checks the correctness of observation data files.


import sys, os, optparse, time, datetime


#############
# ARGUMENTS #
#############


usage = "%prog [options] [file(s) to be checked]\n\n" \
    + "Checks the correctness of observation data files."
parser = optparse.OptionParser(usage = usage)

### Options.

parser.add_option("-s", "--starting-date",
                  help = "Starting date in format YYYY-MM-DD_HH."
                  + " If unavailable, the first date in the observation file"
                  + " is taken.", action = "store", dest = "starting_date")
parser.add_option("-e", "--ending-date",
                  help = "Ending date in format YYYY-MM-DD_HH.",
                  action = "store", dest = "ending_date")
parser.add_option("-p", "--positive",
                  help = "All valid observations should be positive.",
                  action = "store_true", dest = "positive", default = False)
parser.add_option("-u", "--unknown", help = "List of unknown values."
                  + " One may provide them in a list, e.g., -u \"-999 0\"."
                  + " Only numbers are accepted.",
                  action = "store", dest = "unknown_value", default = [])
parser.add_option("-n", "--non-contiguous", help = "Accept non contiguous "
                  + "dates.", action = "store_true",
                  dest = "non_contiguous", default = False)
parser.add_option("-c", "--continue-on-error", help = "Continue parsing even "
                  + "after an error occurred. If not set, the program stops "
                  + "after the first error.",
                  action = "store_false", dest = "exit", default = True)

### Parses arguments.

(option, args) = parser.parse_args()

if not args:
    print "You must provide at least one file."
    print "Option -h or --help for information about usage."
    sys.exit(1)

if (option.starting_date or option.ending_date) and option.non_contiguous:
    print "If option \"-n\" is activated, options \"-s\" and \"-e\" cannot."
    sys.exit(1)

if option.unknown_value:
    option.unknown_value = option.unknown_value.split()
else:
    option.unknown_value = []


####################
# USEFUL FUNCTIONS #
####################


def parse_date(date_string, format):
    return datetime.datetime(*(time.strptime(date_string, format)[0:6]))


def is_num(str):
    """
    Tests whether a string is a number.

    @type str: string
    @param str: String to be tested.

    @rtype: Boolean
    @return: True if 'str' is a number, False otherwise.
    """
    is_num = True
    try:
        num = float(str)
    except ValueError:
        is_num = False
    return is_num


### Printing facility.

class PrintInPlace:
    """
    PrintInPlace enables to write and overwrite data on screen.
    """
    def __init__(self, length = 0):
        """
        @type length: integer
        @param length: Number of characters to be overwritten next time
        something is printed on screen.
        """
        self.length = length
    def __call__(self, elt):
        """
        Prints a string on screen.

        @type elt: string
        @param elt: String to be printed on screen.
        """
        sys.stdout.write(chr(8) * self.length + ' ' * self.length
                         + chr(8) * self.length + str(elt))
        sys.stdout.flush()
        self.length = len(str(elt))
    def Print(self, elt):
        """
        Prints a string on screen.

        @type elt: string
        @param elt: String to be printed on screen.
        """
        self.__call__(elt)
    def Reinit(self):
        """
        Reinits the instance: no character is removed next time something is
        printed on screen.
        """
        self.length = 0
    def Clear(self, elt = ''):
        """
        Reinits the instance: no character is removed next time something is
        printed on screen, and the characters that were supposed to be
        overwritten are cleared. A last string may be printed.

        @type elt: string
        @param elt: The last string to be printed.
        """
        sys.stdout.write(chr(8) * self.length + ' ' * self.length
                         + chr(8) * self.length + str(elt))
        sys.stdout.flush()
        self.length = 0

prt = PrintInPlace()


def error(message, stop = False):
    prt.Clear()
    print "[ERROR] " + message
    if option.exit or stop:
        sys.exit(1)


############
# CHECKING #
############


# Further parsing of the arguments.
if option.starting_date:
    option.starting_date = parse_date(option.starting_date, "%Y-%m-%d_%H")
if option.ending_date:
    option.ending_date = parse_date(option.ending_date, "%Y-%m-%d_%H")

# Further checking of the arguments.
for x in option.unknown_value:
    if not is_num(x):
        print "The unknown values must be number, but you gave: \"" \
            + option.unknown_value + "\"."
        sys.exit(1)
option.unknown_value = [float(x) for x in option.unknown_value]

### Main loop, over the files to be checked.

for filename in args:
    prt.Clear()
    prt("Checking \"%s\"..." % filename)

    # Does the file exist?
    if not os.path.isfile(filename):
        error("\"" + filename + "\" cannot be open.", True)

    # Reads all lines.
    filestream = open(filename, "r")
    line_list = filestream.readlines()
    filestream.close()

    # Empty?
    if line_list == []:
        # This is not allowed with a specified starting or ending date.
        if option.starting_date or option.ending_date:
            error("\"" + filename + "\" is empty.", True)
        else:
            continue

    # All lines must have two elements, except maybe the last line.
    if line_list[-1].strip() == "":
        line_list = line_list[:-1]
    if option.starting_date and line_list == []:
        # There must remain a line since the files are supposed to start at a
        # given date.
        error("\"" + filename + "\" is empty.", True)

    # Sets the starting date.
    if option.starting_date:
        date = option.starting_date
    else:
        try:
            date = parse_date(line_list[0].split()[0], "%Y%m%d%H")
        except:
            error("The date in line 0 of \"" + filename
                  + "\" cannot be parsed:\n" + line)

    # Now checks all lines.
    line_index = 0
    date -= datetime.timedelta(0, 3600)
    for line in line_list:
        line_index += 1
        date += datetime.timedelta(0, 3600)
        element_list = line.split()

        if len(element_list) != 2:
            error("Line " + str(line_index) + " in \"" + filename
                  + "\" does not contain exactly two elements:\n" + line,
                  True)

        # The date.
        try:
            file_date = parse_date(element_list[0], "%Y%m%d%H")
        except:
            error("The date in line " + str(line_index)
                  + " of \"" + filename + "\" cannot be parsed:\n" + line)
        if not option.non_contiguous and date != file_date:
            error("The date in line " + str(line_index)
                  + " of \"" + filename + "\" should be "
                  + date.strftime("%Y-%m-%d_%H") + " but it is "
                  + file_date.strftime("%Y-%m-%d_%H") + ":\n" + line)

        # The observation.
        if not is_num(element_list[1]):
            error("The observation in line " + str(line_index)
                  + " of \"" + filename + "\" is not a number:\n" + line)
        value = float(element_list[1])
        if value in option.unknown_value:
            continue
        if option.positive and value < 0:
            error("The observation in line " + str(line_index)
                  + " of \"" + filename + "\" is not positive:\n" + line)

    if option.ending_date and date != option.ending_date:
        error("File \"" + filename + "\" ends with the date "
              + date.strftime("%Y-%m-%d_%H") + " instead of "
              + option.ending_date.strftime("%Y-%m-%d_%H") + ".")

prt.Clear()
