import csv
import numpy as np
import pandas as pd
import matplotlib.dates as mdates
import datetime

def rename_duplicates(mylist):
    """Searches for duplicate strings in list 
    and appends a number to make them unique.
    
    Args:
        mylist (list): List of strings
        
    Returns:
        list: List of strings
        
    Examples:
    	>>> rename_duplicates(['Date', 'time', 'value', 'time'])   
    	['Date', 'time1', 'value', 'time2']
    """
    dups = {}
    for i, val in enumerate(mylist):
        if val not in dups:
            # Store index of first occurrence and occurrence value
            dups[val] = [i, 1]
        else:
            # Special case for first occurrence
            if dups[val][1] == 1:
                mylist[dups[val][0]] += str(dups[val][1])

            # Increment occurrence value, index value doesn't matter anymore
            dups[val][1] += 1

            # Use stored occurrence value
            mylist[i] += str(dups[val][1])

    return(mylist)
    
    
def infer_dtypes(csvdata):
    """Infer data types from first tuple in list of tuples. 
    Returns list with either 'float64' or 'string' for each item in tuple.
    
    Args:
    	csvdata (list): List of tuples with mixed types
    
    Returns: 
    	list: List of 'float64' and 'string'
    	
    Examples:
    	>>> infer_dtypes([('text1', '123', 'text2', '1.23')])
    	['string', 'float64', 'string', 'float64']
    
    """
    datatypes = []
    for val in csvdata[0]:
        try:
            val1 = float(val)
            datatypes.append('float64')
        except Exception as e1:
            datatypes.append('string')
    return datatypes


def make_dataset(csvdata, names, header='', units='', notes='', 
                 strcols = ['time' , 'date' , 'hhmmss' , 'Geometry' , 
                 'State', 'averaging']):
    """Create Pandas dataframe from csvdata and return 
    dictionary with dataframe, header information, units and notes.
    
    Args:
        csvdata (list): List of tuples containing LI-6800 data
        names (list): List of strings with same length as tuples in csvdata
        header (obj): Header information (any type) to be saved in dictionary
        units (list): List of strings representing units in data file
        notes (str): String with notes.
        
    Returns:
        dict: Dictionary with dataframe as 'df', header, units and notes.
    """
    # Infer datatypes from data
    datatypes = infer_dtypes(csvdata)
    units=units
    notes=notes
    # Rename duplicate column names
    unique_names = rename_duplicates(names)
    # Create df
    df = pd.DataFrame(csvdata, columns=unique_names)
    # Convert columns to right data type
    columntypes = {unique_names[i]:datatypes[i] for i in range\
                   (len(unique_names))}
    columntypes['obs'] = 'int32'
    # Overwriting dtypes with str as specified in strcols
    for key in unique_names:
        if any(ele in key for ele in strcols):
            columntypes[key] = 'string'
    # Replace missing values by nan
    df1 = df.replace(r'^\s*$', np.nan, regex=True)
    df1 = df1.astype(columntypes)
    # Convert date column
    df1['date'] = pd.to_datetime(df1['date'], format='%Y%m%d %H:%M:%S') 
    df1.index = df1["date"]  
    return {'df': df1, 'header': header, 
            'units':{key:val for key, val in zip(unique_names, units)},
            'notes': notes}

 
def read_li(fname, strcols = ['date' , 'hhmmss' , 'Geometry' , 
                 'State', 'averaging']):
    """Read LI-6800 text file and return dictionary with pandas dataframes,
    units, header information and notes.
    
    Args:
    	fname (str): String with path to file
    	strcols (list of strings): List identifying columns with dtype(str)
    	
    Returns:
    	list: list of dictionaries, one for each dataset in LI-6800 data file
    	
    Examples:
    	>>> data = read_li("../data/2021-05-10vpdtest")   	
    """
    
    time_format = "%H:%M:%S"
    datetime_format = "%Y%m%d %H:%M:%S"

    reader = csv.reader(open(fname, 'rt'), delimiter='\t')
    row = ['']
    csvdata = []
    outdata = []
    counter = 0
    names = ''
    units = ''
    notes = []
    header = {}
    undefined = []

    for row in reader:
        if counter == 0:
            if len(row) == 2:
                header[row[0]] = row[1]
        # Checking for start of new data block
        if row == ['[Data]']:
            print(' ')
            print('NEW data set')
            print('length previous = '+str(len(csvdata)))
            # Converting csvdata to np.array and adding to outdata
            if len(csvdata)>1:   
                li_data = make_dataset(csvdata, names, header=header, units=units, notes=notes)
                outdata.append(li_data)  
                units = ''
                names = ''
                notes = []
                header = {}
            # Starting new data series
            csvdata = []
            counter = 1
        if len(row)>1:
            if counter == 3: 
                names = row
                idate = names.index('date') # position with date
            if counter == 4: 
                units = row
            if counter > 4:
                if len(row) == len(names):
                    csvdata.append(tuple(row))
                    date1 = datetime.datetime.strptime(
                        row[idate], datetime_format).date()
                else:
                    # Convert first item to datetime if possible and add to notes
                    try:
                        note = tuple(row)
                        timenote = datetime.datetime.strptime(note[0], time_format)
                        ts_note = datetime.datetime.combine(date1, timenote.time())
                        notes.append((ts_note, row))
                    except Exception as e1:
                        undefined.append(row)
        if counter > 0:
            counter = counter + 1
    li_data = make_dataset(csvdata, names, header=header, units=units,\
                           notes=notes, strcols=strcols)
    outdata.append(li_data)
    return outdata
