#!/usr/bin/env python3
# -*- coding: utf-8 -*-

import pandas as pd
import geopandas as gpd
import matplotlib.pyplot as plt
import xarray as xr
import glob

# catchments
ehype_shp = "/path/to/Data_Ehype/EHYPE3_subbasins/EHYPE3_subbasins.shp"
cmt_gdf = gpd.read_file(ehype_shp)

# powerplants
pp_db = "/path/to/Data_Power/jrc-hydro-power-plant-database.csv"
pp_df = pd.read_csv(pp_db)
pp_gdf = gpd.GeoDataFrame(pp_df, geometry=gpd.points_from_xy(pp_df.lon, pp_df.lat))
pp_gdf.set_crs(epsg=4326, inplace=True)

# filter Powerplants
pp_gdf_filt = pp_gdf[(pp_gdf["type"] == 'HROR') & 
                     (pp_gdf["avg_annual_generation_GWh"].notnull()) & 
                     (pp_gdf["installed_capacity_MW"].notnull())
                     ]

pp_gdf_filt = pp_gdf_filt[pp_gdf_filt["avg_annual_generation_GWh"]/8.76 <= pp_gdf_filt["installed_capacity_MW"]]

# get associated cathment ID
pp_cmt_id = gpd.overlay(pp_gdf_filt, cmt_gdf)
pp_cmt_id.loc[pp_cmt_id.id=="H1905","SUBID"] = 8309231.
pp_cmt_id.loc[pp_cmt_id.id=="H1905","HAROID"] = 8301450.

#%%
# list of ehype files
nc_files = sorted(glob.glob("/path/to/eHYPE/rdis_day_E-HYPEcatch??-EUR-11_ICHEC-EC-EARTH_rcp??_r12i1p1_KNMI-RACMO22E_*_catch_v1.nc"))
#%%
df_list = []
for ii, ncf in enumerate(nc_files):
    with xr.open_dataset(ncf) as nc:
        model_run = ncf.split("-")[1][-2:]
        rcp = ncf.split('_')[-6]
        print(ncf)
        # print(nc.rdis.shape)
        rdis = nc.rdis.sel(id=pp_cmt_id["SUBID"].values)
        df_tmp = rdis.to_pandas()
        df_tmp.columns = pd.MultiIndex.from_arrays([pp_cmt_id["id"], pp_cmt_id["SUBID"], [model_run]*df_tmp.shape[1], [rcp]*df_tmp.shape[1]])
        df_tmp.columns.names = ["pp_id", "cmt_id", "model_run", "rcp"]

        df_tmp = df_tmp.stack(-1)
        df_list.append(df_tmp)
        
df_all = pd.concat(df_list, axis=0).unstack()
df_all.to_pickle("/path/to/Data_Ehype/discharge_at_ROR_powerplant_scenarios.pkl")


