#------------------------------------------------------------------------------
#********* TopEros: An integrated TOPMODEL - MUSLE modelling platform**********
#------------------------------------------------------------------------------

## This is a python implementation of TOPMODEL, TopEros' LSM.
## The program implements a Monte carlo Simulation (MCS) to determine 5
## parameters of TOPMODEL: This TOPMODEL assumes (LnT0-LnTe) = 0.
## The hydrological model also works when all water in UZ leaks to saturated
## zone within a single time step
## The module for infiltration excess overland flow is omitted
## Units of length and time are consistent with units of input observation data
## This TOMODEL is adopted from Fortran Code (Keith Beven); MATLAB code 
## (Hiromu Okazawa, 2015) and Python Code (Okiria Emmanuel, 2018)
## Written by Okiria Emmanuel (2024) during PhD esearch at Gifu University

# IMPORTING NECESSARY LIBRARIES
import platform
import os
import time # time library to help keep count of code run time
start_time = time.time()
import numpy as np
import pandas as pd
import rasterio
import geopandas as gpd



#****************************SECTION 1: FUNCTIONS******************************

#### Dynamic assignment of PC_NAME based on OS
if platform.system().lower() == 'windows':
    PC_NAME='okiri'
    #PC_NAME='emmanuel'
elif platform.system().lower() ==' darwin':
    PC_NAME = 'Emma'
else:
    PC_NAME = 'default'

#### Function to generate file paths
def generate_file_path(pc_name, *path_parts):
    if platform.system() == 'Windows':
        return os.path.join(f'C:\\Users\\{pc_name}\\Dropbox\\NIES\\NIES\\useful_data\\TopEros', *path_parts)
    else:
        return os.path.join(f'/Users/{pc_name}/Dropbox/NIES/NIES/useful_data/TopEros', *path_parts)
    
#### Function to read _xlsx file ( load dataframes) with parse_dates to handle date parsing
def read_xlsx_w_date(file_path, sheet_name, parse_dates):
    return pd.read_excel(file_path, sheet_name=sheet_name, parse_dates=parse_dates)


#### Function to read out excel _csv files
def read_csv(file_path):
    return pd.read_csv(file_path, low_memory=False).values

#### Function to read out raster values into a Numpy array
def read_raster(file_path):
    with rasterio.open(file_path) as ds:
        return ds.read()[0]
    
# Constants
cat_ = 'Namatala' # Atari___, Sironko_


#**********************SECTION 2: Data management******************************

# Read out files & Data into a Numpy array
# Add pd. suffix to function to create a dataframe

rain = pd.read_csv((generate_file_path(PC_NAME, 'Dat', 'Met_dat_', '2015____', cat_, 'Rain_D__.csv'))) # Daily rainfall
rain_hr = read_csv((generate_file_path(PC_NAME, 'Dat', 'Met_dat_', '2015____', cat_, 'Rain_H__.csv'))) # Hourly rainfall
Qobs = read_csv((generate_file_path(PC_NAME, 'Dat', 'Met_dat_', '2015____', cat_, 'Qobs_D__.csv'))) # Daily observed discharge
Qobs_hr = read_csv((generate_file_path(PC_NAME, 'Dat', 'Met_dat_', '2015____', cat_, 'Qobs_H__.csv'))) # Hourly observed discharge
Qobs_hr = Qobs_hr/24 # Coverting units from [mm/day] to [mm/hr]
ET0 = read_csv((generate_file_path(PC_NAME, 'Dat', 'Met_dat_', '2015____', cat_, 'ET0_D___.csv'))) # Daily Evapotranspiration
ET0_hr = read_csv((generate_file_path(PC_NAME, 'Dat', 'Met_dat_', '2015____', cat_, 'ET0_H___.csv'))) # Hourly Evapotranspiration
TI = read_csv((generate_file_path(PC_NAME, 'Dat', 'Others__', cat_, 'TI______.csv'))) # Topographic Inderx
freq = read_csv((generate_file_path(PC_NAME, 'Dat', 'Others__', cat_, 'freq____.csv'))) # Frequency distribution of Topographic Index

TIM = read_raster((generate_file_path(PC_NAME, 'Dat', 'Others__', cat_, 'TI______.tif'))) # Gridded Topographic Index


catchment_shapefile = gpd.read_file(generate_file_path(PC_NAME, 'Dat', 'Others__', cat_, 'CatPoly_.shp'))


# CLEAN UP RASTER VALUES: THRESHOLDING OPERATIONS
#********************** Harmonising Raster shapes******************************
target_shape=TIM.shape # Target array shape of rasters

# Cleaning up TIM array
nan_value_TIM=-128
TIM_masked=np.ma.masked_values(TIM, nan_value_TIM)

#******************************************************************************

#*********************SECTION 3: PRELIMINARY CALCULATIONS**********************
# RANDOM NUMBER GENERATION FOR THE PARAMETER CALIBRATION using MCS
np.random.seed(0) # Resets random number generator to seed 0 per randomisation
MM=10000 # Number of parameter sets generated by MCS
RNDM=np.zeros((MM,9)) #,dtype=np.float16) # Creates array, whose elements are 
# zeros, in shape (MM,7)

## m; PARAMETER CONTROLLING THE RATE OF DECLINE OF TRANSMISSIVITY IN THE SOIL 
## PROFILE: RATE OF CHANGE OF RECESSION CURVEs
a0=0 # Minimum m value (changeable)
b0=50  # Maximum m value (changeable)
RNDM[:,:1]=np.random.uniform(a0,b0,(MM,1)) # Random m values; column 0, size=MM

## Te; THE AREAL VALUE OF T0 [L*L/T]
a1=1000 # Minimum Te value (changeable)
b1=10000 # Maximum Te value (changeable)
RNDM[:,1:2]=np.random.uniform(a1,b1,(MM,1)) # Random Te values, column 1,
# size = MM

## td; SATURATED ZONE TIME DELAY PER UNIT STORAGE DEFICIT [T/L]
a2=0 # Minimum td value (changeable)
b2=.03 # Maximum td value (changeable)
RNDM[:,2:3]=np.random.uniform(a2,b2,(MM,1)) # Random td values, column 2,
# size = MM

## SRZinitial; initial root zone storage deficit [L] #Eliminated from parameters
a3=0 # Minimum value (changeable)
b3=0 # Maximum value (changeable)
RNDM[:,3:4]=np.random.uniform(a3,b3,(MM,1)) # Random SRZinitial values, column 3
# , size = MM

## SRmax; MAXIMUM ROOTZONE STORAGE DEFICIT [L] #Rot depth of < 100 is modelrately deep
a4=0 # Minimum value (changeable)
b4=10 # Maximum value (changeable)
RNDM[:,4:5]=np.random.uniform(a4,b4,(MM,1)) # Random SRmax values, column 4,
# size = MM

# READING OUT EXCEL DATASETS FOR TOPMODEL CALIBRATION & TopEros RUN
# "WE WANT THIS OUT OF THE "FOR" LOOP TO SAVE COMPUTATION TIME"


# ASSIGNING THE VALUE OF INITIAL SIMULATED DISCHARGE
Q1=Qobs[0,0] # Initial base flow is set to equal observed dischare at initial
# time step

# ***************NUMBER OF ROWS & COLUMNS IN SELECT ARRAYS*********************

rows_h,cols_h = rain_hr.shape

# Finding number of cells in Area of Interest
nan_mask = np.isclose(TIM, nan_value_TIM, rtol=1e-05, atol=1e-08)
nan_count=np.sum(nan_mask)
rows,cols = TIM.shape
Num_of_catchment_cells = (rows*cols)-nan_count

#What fraction of a catchment is each cell within the catchnent bounds?
freq_1=1/(Num_of_catchment_cells)

#******************************************************************************

# CREATION OF N-D ARRAY STRUCTURES FOR THE CALCULATION OF DISTRIBUTED VARIABLES
zero_matrix=(np.zeros(TI.shape))@(np.transpose(np.zeros(rain.shape))) # Matrix
# multiplication
print('2-D shape is', zero_matrix.shape)
zero_matrix_2=np.zeros((rows_h,rows,cols)) #3-D array: [depth,r,c]
print('3-D shape is', zero_matrix_2.shape)
zero_matrix_2_1=np.zeros((rows_h,len(TI),cols_h))
print('3-D_2 shape is', zero_matrix_2_1.shape)
zero_matrix_3=np.zeros((rows_h,rows,cols,cols_h)) #4-D array: [batch,r,c,depth]
print('4-D shape is', zero_matrix_3.shape)

# ASSIGNING THE VALUE OF INITIAL SIMULATED DISCHARGE
Q1_hr=Qobs_hr[0,0] # Initial obbserved doscharge is set to equal observed 
# discharge at initial time step

#SETTING UP INDICES TO ALLOW FOR WARMUP PERIOD
start_index =0
end_index = len(rain)

# CALCULATING THE AREAL AVERAGE OF TI
Lambda=np.sum(TI*freq)/np.sum(freq) # Lambda is catchment scale areal average
# of TI

#******************************* SECTION 4 ************************************
# TOPMODEL IMPLEMENTATION / CALIBRATION
for M in range(MM): # M iteration variable takes on values 0 to (MM-1) for each
# iteration

    # PREALLOCATE ARRAYS　
    Qv=np.zeros(rain.shape) # Qv is lumped daily vertical flux from unsaturated
    #zone (UZ) to saturated zone (SZ) [L/T]: drainage flux into the water table
    #from the unsaturated zone
    Qv_hr=np.zeros(rain_hr.shape) #Lumped hourly vertical flux from UZ to SZ
    Qover=np.zeros(rain.shape) # Qover is lumped daily overland flow for
    Qover_hr=np.zeros(rain_hr.shape) # Lumped hourly overland flow
    Qsub=np.zeros(rain.shape) # Qsub (base flow) is lumped saturated subsurface
    # flow [L/T]
    Qsub_hr=np.zeros(rain_hr.shape) # Lumped hourly subsirface flow: Tricky
    # (Perhaps Qsub_hr changes at daily time scale)
    Qsim=np.zeros(rain.shape) # Qsim is simulated discharge: Qover + Qsub [L/T]
    Qsim_hr=np.zeros(rain_hr.shape)
    ETa=np.zeros(rain.shape) # ETa is actual evepotranspiration [L]
    ETa_hr=np.zeros(rain_hr.shape) # Actual hourly evapotranspiration
    SRZ=np.zeros(rain.shape) # Storage deficit in root zone [L]
    SRZ_hr=np.zeros(rain_hr.shape) # Lumped hourly storage deficit in rootzone
    Sbar=np.zeros(rain.shape) # Areal average of S [L]
    Sbar_hr=np.zeros(rain_hr.shape) # Hourly areal average of S [L]
    S_hr=np.zeros(zero_matrix_2_1.shape)
    S=np.zeros(zero_matrix.shape) # Pixel scale storage deficit until
    # saturation [L]
    SUZ=np.zeros(zero_matrix.shape) # Storage in unsaturated zone [L]
    SUZ_hr=np.zeros(zero_matrix_2_1.shape)
    EX=np.zeros(zero_matrix.shape) # Redundant or excess water amount in root
    # zone [L]
    EX_hr=np.zeros(zero_matrix_2_1.shape)
    UZ=np.zeros(zero_matrix.shape) # Vertical flux from unsaturated zone to
    # saturated zone [L]
    UZ_hr=np.zeros(zero_matrix_2_1.shape)

    #　ALLOCATE ROOMS TO PARAMETERS IN THE RNDM STORE
    m=RNDM[M,0]
    Te=RNDM[M,1]
    td=RNDM[M,2] # Represents the effective permeability of the soil
    SRZinitial=RNDM[M,3]
    SRmax=RNDM[M,4]
    
    # CALCUALTE THE MEAN STORAGE DEFICIT OF CATCHMENT
    Sbar_hr[0,0]=-m*np.log((Q1_hr)/(Te*np.exp(-Lambda)))  # Mean storage deficit of
    # watershed [m/dt]

    # CALCULATE FLUXES　AT EACH STEP
    for t in range(rows_h): # Iterate through the days in the year
        for ha in range(cols_h): # Iterate through the hours in a day
            # ITERATE THROUGH THE TI CLASSES
            for ia in range(len(TI)):

                # CALCULATE THE LOCAL STORAGE DEFICIT
                S_hr[t,ia,ha]=Sbar_hr[t,ha]+(m*(Lambda-TI[ia,:]))
                if S_hr[t,ia,ha]<0:
                    S_hr[t,ia,ha]=0

            # ROOTZONE CALCULATIONS
            SRZ_hr[0,0]=SRZinitial
            SRZ_hr[t,ha]=SRZ_hr[t,ha]-rain_hr[t,ha]
            if SRZ_hr[t,ha]<0.0:
                SUZ_hr[t,:,ha]=SUZ_hr[t,:,ha]-SRZ_hr[t,ha]
                SRZ_hr[t,ha]=0

            # UNSATURATED ZONE CALCULATIONS
            for ib in range (len(TI)):
                if SUZ_hr[t,ib,ha]>S_hr[t,ib,ha]:
                    EX_hr[t,ib,ha]=SUZ_hr[t,ib,ha]-S_hr[t,ib,ha]
                    SUZ_hr[t,ib,ha]=S_hr[t,ib,ha]
                # CALCULATE DRAINAGE FROM SUZ
                if S_hr[t,ib,ha]>0.0:
                    UZ_hr[t,ib,ha]=SUZ_hr[t,ib,ha]/(td*S_hr[t,ib,ha])
                    if UZ_hr[t,ib,ha]>SUZ_hr[t,ib,ha]:
                        UZ_hr[t,ib,ha]=SUZ_hr[t,ib,ha]
                    SUZ_hr[t,ib,ha]=SUZ_hr[t,ib,ha]-UZ_hr[t,ib,ha]
                    if SUZ_hr[t,ib,ha]<0.0000001:
                        SUZ_hr[t,ib,ha]=0

            # CALCULATE ACTUAL EVAPOTRANSPIRATION
            if ET0_hr[t,ha]>0.0:
                ETa_hr[t,ha]=ET0_hr[t,ha]*(1-(SRZ_hr[t,ha]/SRmax))
                if ETa_hr[t,ha]>(SRmax-SRZ_hr[t,ha]):
                    ETa_hr[t,ha]=SRmax-SRZ_hr[t,ha]
                SRZ_hr[t,ha]=SRZ_hr[t,ha]+ETa_hr[t,ha]


            # CALCULATE HOURLY DISCHARGES AT CATCHMENT OUTLET
            Qsub_hr[t,ha]=Te*np.exp(-Lambda)*np.exp((-1)*Sbar_hr[t,ha]/m)
            Qover_hr[t,ha]=np.sum(freq*EX_hr[t:t+1,:,ha:ha+1])
            Qv_hr[t,ha]=np.sum(freq*UZ_hr[t:t+1,:,ha:ha+1])
            Qsim_hr[t,ha]=Qover_hr[t,ha]+Qsub_hr[t,ha]


            # UPDATE VARIABLES FOR THE NEXT HOUR TIME STEP
            if ha<(cols_h-1):
                Sbar_hr[t,ha+1]=Sbar_hr[t,ha]-Qv_hr[t,ha]+Qsub_hr[t,ha]
                SRZ_hr[t,ha+1]=SRZ_hr[t,ha] # SRZ value at beginning of next time
                # step is set as SRZ value at end of previous time step;
                # done because value was not updating from previous version of
                #code
                #SUZ_hr[t,:,ha+1]=SUZ_hr[t,:,ha] # No need since SUZ is 0 at
                # beginning of each time step, given small td value # please
                # confirm this statement

        # UPDATING VARIABLES FOR THE NEXT DAY TIME STEP
        if t<(rows_h-1):
            Sbar_hr[t+1,0]=Sbar_hr[t,23]-Qv_hr[t,23]+Qsub_hr[t,23]
            SRZ_hr[t+1,0]=SRZ_hr[t,23]
            #SUZ_hr[t+1,:,0]=SUZ_hr[t,:,23]

        # CALCULATING DAILY FLUXES AT THE CATCHMENT OUTLET
        Qsub[t,:]=(np.mean(Qsub_hr[t,:]))*24 #Sbar; Qsub, are things whose changes are
        #detectable at daily time scale? # Find literature to cite. : *******How is it if we take the mean?******
        #Qsub[t,:]=np.mean(Qsub_hr[t,:])
        Qover[t,:]=(np.mean(Qover_hr[t,:]))*24
        Qv[t,:]=(np.mean(Qv_hr[t,:]))*24
        Qsim[t,:]=Qover[t,:]+Qsub[t,:]

    # OBJECTIVE FUNCTION CALCULATION
    Er=np.sum(Qobs-Qsim)
    SSE=np.sum((Qobs-Qsim)**2)
    # Calculation of error mean or error variance
    Qm=np.mean(Qobs) # Mean of observed discharge
    Qs=np.sum(Qobs)
    SSU=np.sum((Qobs-Qm)**2) # Mean variance
    STD=np.sqrt(SSU/rain.size)
    Check_NS=1-(SSE/SSU) # NS /likelihooh value/model performance efficiency
    Check_RMSE=np.sqrt(SSE/(rain.size))
    Check_RSR=Check_RMSE/STD
    Check_PBIAS=Er*100/Qs #Yapo et al.(1996) & Sorooshian et al. (1993); Gupta et al. (1999). NB:Sorooshin & Yapo have different formulations of PBIAS from Gupta et al. (1999). I adopted Gupta
    RNDM[M,5]=Check_NS
    RNDM[M,6]=Check_RMSE
    RNDM[M,7]=Check_RSR
    RNDM[M,8]=Check_PBIAS

# DECISION OF OPTIMUM PARAMETERS
w=RNDM[:,5].argsort() # Sorting by column 5 in ascending order while
# maintaining array intergrity
RNDM=RNDM[w[::-1]] # Sort w by descending order; (::-1) reverses w order

# SAVE THE CALIOBRATED PARAMETERS TO A FOLDER
folder_path = generate_file_path(PC_NAME, 'Out', cat_) 
os.makedirs(folder_path, exist_ok=True) # Ensure the folder exists
file_name = 'CalPar__.csv'
file_path = os.path.join(folder_path, file_name)
np.savetxt(file_path, RNDM,fmt='%.4f',delimiter=',',header="m,Te,td,\
SRZinitial,SRmax,NS,RMSE,RSR,PBIAS")
