#!/usr/bin/env python3
# -*- coding: utf-8 -*-id

import pandas as pd
import geopandas as gpd
import matplotlib.pyplot as plt
import xarray as xr
import glob
import numpy as np

def estimate_power_per_discharge(df_in, pp_data, n_iter=3, scaling_df = None):
    pp_id = df_in.columns.get_level_values("pp_id")
    avg_power_MW = pp_data.loc[pp_id, "avg_annual_generation_GWh"]/8760*1000
    max_capacity = pp_data.loc[pp_id, "installed_capacity_MW"]
    
    ts_tmp = df_in.copy(deep=True)
    
    if scaling_df is None:
        calc_scaling = True
        scaling_df = pd.DataFrame(index = pp_id, columns = range(n_iter))
    else:
        calc_scaling = False
        
    for ii in range(n_iter):
        
        if calc_scaling:
            scaling_tmp = avg_power_MW/ts_tmp.mean()
            scaling_df.loc[:,ii] = scaling_tmp

        scaling = scaling_df.loc[:,ii]
            
        ts_tmp = ts_tmp*scaling
        ts_tmp = ts_tmp.where(ts_tmp<=max_capacity, max_capacity, axis=1)
    
    return ts_tmp, scaling_df
    
def analytic_powercurve(discharge, power):
    P_max = power.max()
    P_min = power.min()
    
    P = power[(power>P_min) & (power<P_max)].values
    Q = discharge[(power>P_min) & (power<P_max)].values
    
    k,d = np.polyfit(Q[np.argsort(P)],P[np.argsort(P)],1)
                       
    return k,d,P_min,P_max

def apply_analytic_powercurve(Q, k, d, P_min, P_max):
    P_out = Q*k + d
    P_out[P_out < P_min] = P_min
    P_out[P_out > P_max] = P_max
    
    return P_out
    
    
    

# powerplants
pp_db = "../Data_Power/jrc-hydro-power-plant-database_geometry_est_ann_power.xlsx"
pp_df = pd.read_excel(pp_db)
pp_gdf = gpd.GeoDataFrame(pp_df, geometry=gpd.points_from_xy(pp_df.lon, pp_df.lat))
pp_gdf.set_crs(epsg=4326, inplace=True)


# filter Powerplants
pp_gdf_filt = pp_gdf[(pp_gdf["type"] == 'HROR') & 
                     (pp_gdf["avg_annual_generation_GWh"].notnull()) & 
                     (pp_gdf["installed_capacity_MW"].notnull())
                     ]

pp_gdf_filt = pp_gdf_filt[pp_gdf_filt["avg_annual_generation_GWh"]/8.76 <= pp_gdf_filt["installed_capacity_MW"]]
pp_gdf_filt.set_index("id", inplace=True)

Q_ref = pd.read_pickle("/path/to/Data_Ehype/eHYPE_discharge_model-mean_historical.pkl")

Q_all = pd.read_pickle("/path/to/Data_Ehype/discharge_all_ERA5.pkl")
Q_all.columns = Q_all.columns.astype(float)
Q_all = Q_all.loc[:,Q_ref.columns.get_level_values(1).values]
Q_all.columns = Q_ref.columns.get_level_values(0)

Q_all = Q_all.loc[:,pp_gdf_filt.index]
Q_all.columns.name = "pp_id"
Q_all.index.name = "time"

daily_ror0, scaling = estimate_power_per_discharge(Q_all, pp_gdf_filt, n_iter = 10)

df_apc = pd.DataFrame(index = daily_ror0.columns, columns = ['k','d','P_min', 'P_max'])
for pp in daily_ror0.columns:
    df_apc.loc[pp,:] = analytic_powercurve(Q_all[pp],daily_ror0[pp])

    

#%%
daily_ror0.columns = pd.MultiIndex.from_product([daily_ror0.columns, ["ERA5"]])

daily_ror0.to_pickle("/path/to/Data_Ehype/daily_power_ror_era5.pkl")

df_apc.to_pickle("/path/to/Data_Ehype/daily_ror_all_apc_era5.pkl")


