#! /usr/bin/env python3
import xarray as xr
import numpy as np
import pandas as pd
import geopandas as gpd
import matplotlib.pyplot as plt
import glob
import os
from joblib import Parallel, delayed

def extract_nuts(ds,nuts, crd2, out_file):
    print(os.path.basename(in_file), nuts)
        
    if os.path.isfile(out_file):
        return 1
    
    sel_ids = crd2.loc[crd2["NUTS"]==nuts,"OBJECTID"].values
    sel_ids_bool = np.isin(ds.IDs, sel_ids)
    ds_sel = ds.sel(npts=sel_ids_bool).load()
    
    ds_sel.to_netcdf(out_file)
    
    return 0

in_dir = '/path/to/Radiation_input/'
in_files = sorted(glob.glob(in_dir + "UnivVienna_200???-200???_15min.nc")) 

# @impn!
out_dir = "/path/to/output/Data_Rad/"

nuts2 = gpd.read_file("/path/to//NUTS2.shp")
crd = gpd.read_file("/path/to/Coordinates_Radiation_Dataset.shp")
crd2 = gpd.sjoin(crd,nuts2[["NUTS","geometry"]])

num_cores = 6

for in_file in in_files:
    with xr.open_dataset(in_file) as ds:
        ds = ds.drop("RNI")
        nuts_list = sorted(set(crd2["NUTS"]))
        
        out_file_tmp = out_dir + os.path.basename(in_file).replace(".nc", "_NUTS.nc")
        
        parallel_input = [(ds, nuts, crd2, out_file_tmp.replace("NUTS", nuts)) for nuts in nuts_list if not os.path.isfile(out_file_tmp.replace("NUTS", nuts))]
        Parallel(n_jobs=num_cores,verbose=10)(delayed(extract_nuts)(*args) for args in parallel_input) 
        
