#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Tue Nov 16 11:51:27 2021

@author: david
"""
import pandas as pd
import geopandas as gpd
import matplotlib.pyplot as plt
import xarray as xr
import glob
from joblib import Parallel, delayed
import logging
import datetime as dt

logging.basicConfig(filename='ehype_readout.log', level=logging.DEBUG)

def extract_subbasins(ncf, pp_cmt_id):
    #try:
    with xr.open_dataset(ncf) as nc:
        cmt = ncf.split("-")[1][-2:]
        print("start", ncf)
        # print(nc.rdis.shape)
        rdis = nc.rdis.sel(id=pp_cmt_id["SUBID"].values)
        df_tmp = rdis.to_pandas()
        df_tmp.columns = pd.MultiIndex.from_arrays([pp_cmt_id["id"], pp_cmt_id["SUBID"], [cmt]*df_tmp.shape[1]])
        df_tmp.columns.names = ["pp_id", "cmt_id", "model_run"]

        df_tmp = df_tmp.stack(-1)
        
        pkl_out_name = '../Data_Ehype/pkl_tmp/'  + ncf.split('/')[-1].replace('.nc','_H141.pkl')
        df_tmp.to_pickle(pkl_out_name)
        print("end", ncf)
        logging.info("{0:%Y-%m-%d %H:%M:%S}: {1:s} read".format(dt.datetime.now(), ncf))
        return 0
    #except Exception as ex:
        #print(ex)
        #logging.exception("{0:%Y-%m-%d %H:%M:%S}: {1:s}:".format(dt.datetime.now(), ncf))
        #return 1


# catchments
ehype_shp = "../Data_Ehype/EHYPE3_subbasins/EHYPE3_subbasins.shp"
cmt_gdf = gpd.read_file(ehype_shp)

# powerplants
pp_db = "../Data_Power/jrc-hydro-power-plant-database.csv"
pp_df = pd.read_csv(pp_db)
pp_gdf = gpd.GeoDataFrame(pp_df, geometry=gpd.points_from_xy(pp_df.lon, pp_df.lat))
pp_gdf.set_crs(epsg=4326, inplace=True)


# get associated cathment ID
pp_cmt_id = gpd.overlay(pp_gdf, cmt_gdf)
pp_cmt_id.loc[pp_cmt_id.id=="H1905","SUBID"] = 8309231.
pp_cmt_id.loc[pp_cmt_id.id=="H1905","HAROID"] = 8301450.

pp_cmt_id.loc[pp_cmt_id.id=="H141","SUBID"] = 9534611.
pp_cmt_id.loc[pp_cmt_id.id=="H141","HAROID"] = 9534269.

pp_cmt_id = pp_cmt_id[pp_cmt_id["id"]=="H141"]


#%%
# list of ehype files
nc_files = sorted(glob.glob("/path/to/eHYPE/rdis_day_E-HYPEcatch??-EUR-11_ICHEC-EC-EARTH_*_r12i1p1_KNMI-RACMO22E_*_catch_v1.nc"))
#%%

parallel_input = []
num_cores = 48

parallel_input = [(ncf, pp_cmt_id) for ncf in nc_files]
Parallel(n_jobs=num_cores)(delayed(extract_subbasins)(*args) for args in parallel_input)   
        

