#! /usr/bin/env python3
# -*- coding: utf-8 -*-

import pandas as pd
import geopandas as gpd
import matplotlib.pyplot as plt
import xarray as xr
import glob
import os

def read_pickle(fname):
    print("read pickle", fname)
    df = pd.read_pickle(fname)
    
    try:
        df = df.drop(("H141",9500777.0),axis=1)
    except KeyError:
        pass
    
    rcp = os.path.basename(fname).split("_")[4]
    cols = pd.MultiIndex.from_arrays([df.columns.get_level_values(0),
                                      df.columns.get_level_values(1),
                                      [rcp]*df.shape[1]],
                                      names = ["pp_id", "cmt_id", "scenario"])
    
    df.columns = cols
    
    return df

pkl_dir = "/path/to/Data_Ehype/pkl_tmp/"

#for scen in ["historical","rcp45","rcp85"]:
for scen in ["historical","rcp45"]:   
    flist_h141 = set(glob.glob(pkl_dir + "rdis_day_E-HYPE*" + scen + "*H141.pkl"))
    flist = sorted(set(glob.glob(pkl_dir + "rdis_day_E-HYPE*" + scen + "*.pkl")) - flist_h141)
    
    df_comb = pd.concat([read_pickle(ff) for ff in flist], axis=0)
    df_comb.sort_index(inplace=True)
    df_comb2 = pd.concat([read_pickle(ff) for ff in sorted(flist_h141)], axis=0)
    df_comb2.sort_index(inplace=True)
    df_all = pd.concat([df_comb, df_comb2], axis=1)
    df_all.to_pickle(pkl_dir + "eHYPE_discharge_" + scen + ".pkl")
    
    df_all_mean = df_all.groupby(df_all.index.get_level_values(0)).mean()
    df_all_mean.to_pickle(pkl_dir + "eHYPE_discharge_model-mean_" + scen + ".pkl")
    
