Source code for lib.specific_ipsl

"""
##############################################################################################

  Project   : CRESCENDO/AEROCOM
  Filename  : liborganize.py
  Author    : Ramiro Checa-Garcia
  email     : rcheca@lsce.ipsl.fr
  Purpose   : Reorganize variables for LMDzINCAOR experiment runs.

  Revision History ----------------------------------------------------------

  Date       Author     Ref    Revision

  2018-Apr   R.Checa           First code
  2018-Jul   R.Checa           Implemented as module
  2018-Nov   R.Checa           Final first version.


  TODO LIST:

  1. Diagnostics on lev_var not in hard-code         (Improvement)
  2. Add reference values for key diagnostics        (Improvement)
  3. Add description to each function                (Documentation)
  4. Factors in new_var could be variables in netcdf (Improvement)

##############################################################################################
"""

import xarray as xr
import numpy  as np
import glob
from  netCDF4  import Dataset
import os
import json
from pprint import pprint
import shutil
import yaml
import pandas as pd
import lib.check as check
import gc 

[docs]def memory_usage_psutil(): # return the memory usage in MB import psutil return psutil.virtual_memory()
[docs]def myprint(mystr, finfo=None): print(mystr) if finfo!=None: finfo.write(mystr+'\n') return
[docs]def post_processing(mysettings, directory, list_variables, lyears, post_t, study, expID, freq, finfo=None): ''' This function is doing the post-processing of the extracted files Note that it is not the best function from the programming point of view as the results depends strongly on the files found in the directory. - It should be important to add something that test if the files on the directories are the files expected from the settings file ''' gc.collect() # to ensure that we are not collecting too much garbage. ### Add a new config file where the name of the files with check are added ### then we have to make a kind of loop for all files f_checkings= open('etc/check_cmip6_variables.yaml', 'r') cmip6check = yaml.load(f_checkings) # We define here the columns of the info-checks file ------------ dfcolumns=['variable','method','value', 'units', 'testing-file'] vars_checks = [] l_non_processed = [] l_yes_processed = [] l_uni_processed = [] l_yes_checked = [] for idirec, direc in enumerate(list_variables): str1=('\n ... Post-Processing of files related with '+direc) myprint(str1, finfo=finfo) lfiles = sorted(glob.glob(directory+direc+'/'+direc+'*.nc')) print(lfiles) ## we select only those files that begins with the variable name, as they were ## generated on the pre-processing step. ## We also ensure that we are not including aggregations of several files ## and we remove those files with the '_accum' string on the name. for indx, item in enumerate(lfiles): if '-accum' in item or '_accum' in item: del lfiles[indx] for item in lfiles: if len(item)!=len(lfiles[0]): str1=('\n -- Problem not all filenames have the same length: unusual') myprint(str1, finfo=finfo) if mysettings['safety']['check_files']==True: exit() if len(lfiles)==1: first_yr = lfiles[0].split('_')[-2] last_yr = lfiles[0].split('_')[-1].split('.')[0] stringyr = lyears[0] new_file_t= lfiles[0].replace(stringyr+'0101',first_yr) new_file = new_file_t.replace('_'+stringyr+'1231','-'+last_yr+'-accum') varid = new_file.split('/')[-1].split('_')[0] #print('check this', new_file,varid) if len(lfiles)>1: first_yr = lfiles[1].split('_')[-2] last_yr = lfiles[-1].split('_')[-1].split('.')[0] str_files = ' '.join(lfiles[1::]) stringyr = lyears[0] new_file_t= lfiles[0].replace(stringyr+'0101',first_yr) new_file = new_file_t.replace('_'+stringyr+'1231','-'+last_yr+'-accum') #print(new_file) varid = new_file.split('/')[-1].split('_')[0] # Concatenation of all files ------------------------------------------------- if post_t=='all': commandnco ='ncrcat -O '+str_files +' '+ new_file os.system(commandnco) varid = new_file.split('/')[-1].split('_')[0] if varid!=direc: varid=direc good_name, info_var = create_filename(mysettings, study, expID, varid, new_file, freq=freq, finfo=finfo) if good_name!=False: save_name = directory+direc+'/'+good_name unit_stat, finalname = add_global_attributes(mysettings, new_file, save_name, study, info_var) l_yes_processed.append(direc) l_uni_processed.append(unit_stat) if varid in cmip6check.keys(): l_yes_checked.append(varid) for mymethod in cmip6check[varid]['method'].keys(): if mymethod=='tendency_mass': a1, a2, a3 = check.tendency_mass(finalname, varid, 'etc/area_grid.nc', 'area', finfo=finfo) vars_checks.append({'variable': a1, 'method': mymethod, 'value': a3, 'units':a2, 'testing-file':finalname.split('/')[-1]}) if mymethod=='global_mean': a1, a2, a3, a4= check.global_mean(finalname, varid, cmip6check[varid], finfo=finfo) vars_checks.append({'variable': a1, 'method': mymethod, 'value': a3, 'units':a2, 'testing-file':finalname.split('/')[-1]}) if mymethod=='global_mean_atlev': for level in cmip6check[varid]['method'][mymethod]['levels']: a1, a2, a3 , a4= check.global_mean_atlev(finalname, varid, cmip6check[varid], level=level, finfo=finfo) vars_checks.append({'variable': a1, 'method': 'glo mean at'+a4, 'value': a3, 'units':a2, 'testing-file':finalname.split('/')[-1]}) if mymethod=='total_load': a1, a2, a3 = check.total_load(finalname, varid, cmip6check[varid], area_fname='data/area_grid.nc', area_varid='area', finfo=finfo) vars_checks.append({'variable': a1, 'method': mymethod, 'value': a3, 'units':a2, 'testing-file':finalname.split('/')[-1]}) else: l_non_processed.append(direc) if post_t=='year' or 'AEROCOM' in expID: #print(expID, '++++++++++++++++++++++++++++++++++++++++') for fname in lfiles: good_name, info_var = create_filename(mysettings, study, expID, varid, fname.replace('1_20','1-20'), freq=freq, finfo=finfo) if good_name!=False: save_name = directory+direc+'/'+good_name unit_stat, finalname = add_global_attributes(mysettings, fname, save_name, study, info_var) l_yes_processed.append(direc) l_uni_processed.append(unit_stat) if varid in cmip6check.keys(): l_yes_checked.append(varid) for mymethod in cmip6check[varid]['method'].keys(): if mymethod=='tendency_mass': a1, a2, a3 = check.tendency_mass(finalname, varid, 'etc/area_grid.nc', 'area', finfo=finfo) vars_checks.append({'variable': a1, 'method': mymethod, 'value': a3, 'units':a2, 'testing-file':finalname.split('/')[-1]}) if mymethod=='global_mean': a1, a2, a3, a4= check.global_mean(finalname, varid, cmip6check[varid], finfo=finfo) vars_checks.append({'variable': a1, 'method': mymethod, 'value': a3, 'units':a2, 'testing-file':finalname.split('/')[-1]}) if mymethod=='global_mean_atlev': for level in cmip6check[varid]['method'][mymethod]['levels']: a1, a2, a3 , a4= check.global_mean_atlev(finalname, varid, cmip6check[varid], level=level, finfo=finfo) vars_checks.append({'variable': a1, 'method': 'glo mean at'+a4, 'value': a3, 'units':a2, 'testing-file':finalname.split('/')[-1]}) else: l_non_processed.append(direc) non_cmip6_units = [y for i,y in enumerate(l_yes_processed) if l_uni_processed[i]==False] yes_cmip6_units = [y for i,y in enumerate(l_yes_processed) if l_uni_processed[i]==True] str1=('\n\n Processed variables ...... ['+str(len(l_yes_processed))+'] '+ str(l_yes_processed)+ '\n') str2=(' without CMIP6 units .... '+ str([y for i,y in enumerate(l_yes_processed) if l_uni_processed[i]==False])+ '\n') str3=(' with CMIP6 units .... '+ str([y for i,y in enumerate(l_yes_processed) if l_uni_processed[i]==True])+'\n') with open('vars_cf_cmip6.txt', 'w') as fcmip6: for y in yes_cmip6_units: fcmip6.write(y+'\n') str4=(' Non processed variables ... ['+str(len(l_non_processed))+'] '+ str(l_non_processed)+'\n') myprint(str1+str2+str3+str4, finfo=finfo) myprint('\n Checked variables ... ['+str(len(l_yes_checked))+'] '+ str(l_yes_checked)+ '\n', finfo=finfo) l_non_checked = set(l_yes_processed)-set(l_yes_checked) myprint( ' ... processed non checked ... ['+str(len(l_non_checked))+'] '+ str(l_non_checked)+ '\n', finfo=finfo) pd.set_option('display.width', 140) if freq=='hr': previ_process_file = mysettings[study]['process']['hrs'] else: previ_process_file = mysettings[study]['process'][freq] cmip6_process_file = previ_process_file.replace('.process', '_cmip6.process') noncmip6_process_file = previ_process_file.replace('.process', '_noncmip6.process') df = pd.read_table(previ_process_file, skiprows=0, header=0, delim_whitespace=True) cmip6_df = df[df['varname-out'].isin(yes_cmip6_units)] cmip6_df.to_csv('tempf', sep=' ', index=False) os.system(('column -t tempf > '+ cmip6_process_file)) non_df = df[df['varname-out'].isin(non_cmip6_units+l_non_processed)] non_df.to_csv('tempf', sep=' ', index=False) os.system(('column -t tempf > '+ noncmip6_process_file)) for varid in l_non_checked: vars_checks.append({'variable': varid, 'method': 'not-set-up', 'value': -9.99, 'units':'', 'testing-file':''}) test_info = pd.DataFrame(vars_checks, columns=dfcolumns ) os.remove('tempf') return test_info
[docs]def find_tableID(varID, freq='mon', origin='ATM'): table_out=False ltable=[] varIDwith = "'"+varID+"'" cmip6_tables = 'etc/cmip6-cmor-tables-master/Tables/' for fname in os.listdir(cmip6_tables): # change directory as needed #print(fname) if os.path.isfile(cmip6_tables+fname): # it's a file, not a directory entry with open(cmip6_tables+fname) as f: # open file for line in f: # process line by line if varIDwith in line: # search for string table_name = cmip6_tables+fname ltable.append(table_name) break varIDwith = '"'+varID+'"' for fname in os.listdir(cmip6_tables): # change directory as needed #print(fname) if os.path.isfile(cmip6_tables+fname): # it's a file, not a directory entry with open(cmip6_tables+fname) as f: # open file for line in f: # process line by line if varIDwith in line: # search for string table_name = cmip6_tables+fname ltable.append(table_name) break if len(ltable)==1: table_out = ltable[0] else: l2table=[] for table_name in ltable: if freq in table_name: l2table.append(table_name) if len(l2table)==1: table_out=l2table[0] else: for table_name in l2table: if 'CMIP6_AER' in table_name: table_out = table_name if table_out==False: for table_name in l2table: if 'CMIP6_A' in table_name: table_out = table_name return table_out
[docs]def create_filename(mysettings, study, expID, varID, varfile, freq='mon', finfo=''): """ Filename will have the structure: varID_tableID_sourceID_experimentID_memberID_gridlevel_timerange varID -------> is the variable name: varname tableID ------> depends on variable AERday, Amon etc... sourceID -----> IPSL-LMDZORINCAv6 experimentID -> CRESCWP3PD-amip memberID -----> v5-r1i1p1f1 (v5 refers to our internal version) gridlabel ----> gr (refers to the original grid but we provide at preslev) timerange ----> 20000101-20121231 for all cases. """ table_name = find_tableID(varID, freq=freq) if table_name == False: print(' ---> variable: ',varID, ' not found in CMIP6-cmor-tables\n') return False, False tableID = table_name.split('/')[-1].split('.')[0].split('CMIP6')[1][1::] sourceID = mysettings[study]['expCVs']['sourceID'] experimentID = mysettings[study]['expID'] memberID = mysettings[study]['expCVs']['memberID'] gridlabel = mysettings[study]['expCVs']['gridlabel'] timerange = varfile.split('_')[-1] if mysettings[study]['expKIND'].upper() in ['CMIP6','CRESCENDO','JASPERKOK']: # for CMIP6 or CRESCENDO save_name = '_'.join([varID ,tableID, sourceID, experimentID, memberID, gridlabel, timerange ]) if 'aerocom' in mysettings[study]['expKIND'].lower(): # for aerocom-phaseIII # aerocom3_<ModelName>_<ExperimentName>_<VariableName>_<VerticalCoordinateType> # _<Period>_<Frequency>.nc # expID is AP3-CTRL2016-PD or AP3-CTRL2016-PI or AP3-remotesensing etc.. save_name = ('aerocom3_' + sourceID + '_' + mysettings[study]['expID'] + '_' + varID + '_' + timerange.replace('.nc','') + '_' + freq + '.nc') with open(table_name, 'r') as cmip6tb: tbdata = json.load(cmip6tb) info_var = tbdata['variable_entry'][varID] myprint(' ... final filename will be :'+ save_name, finfo=finfo) return save_name, info_var
[docs]def add_global_attributes(mysettings, ncname, newname, study, info_var, finfo=None): dataset = xr.open_dataset(ncname) #, autoclose=True) autoclose not implemented in IRENE old xarray version # Here we opened a dataset dataset.attrs['activity_id'] = mysettings[study]['expCVs']['activityID'] dataset.attrs['institution_id']='IPSL-LSCE' dataset.attrs['source_id'] ='IPSL-LMDZORINCAv6' dataset.attrs['source_type'] ='AGCM-AER' dataset.attrs['realm'] ='aerosols' dataset.attrs['experiment_id'] = mysettings[study]['expID'] dataset.attrs['packagedby'] ='pyIPSLpack (contact: rcheca@lsce.ipsl.fr)' dataset.attrs['contact'] = mysettings[study]['expCVs']['contactID'] dataset.attrs['spinning-up'] = mysettings[study]['expCVs']['spin-up'] for field in ['long_name', 'standard_name', 'modeling_realm', 'frequency', 'dimensions', 'comment']: dataset[info_var['out_name']].attrs[field]=info_var[field] if dataset[info_var['out_name']].attrs['units']!=info_var['units']: # Here it would be possible to create a file with units equivalences... lipsl = ['-', '-', 'kg/kg', 'kg/m2' , 'kg/m2/s', 'kg/(s*m2)' , 'kg/m3' , '-', 'm^-1', 'W/m2', 'kg/kg', 'm/s'] l_cf = ['%', 'mol mol-1', '1' , 'kg m-2', 'kg m-2 s-1', 'kg m-2 s-1', 'kg m-3', '1', 'm-1' , 'W m-2','kg kg-1', 'm s-1'] for valipsl, valcf in zip(lipsl, l_cf): if dataset[info_var['out_name']].attrs['units']==valipsl and info_var['units']==valcf: dataset[info_var['out_name']].attrs['units']=info_var['units'] if dataset[info_var['out_name']].attrs['units']!=info_var['units']: str1=(' non CMIP6 units '+ dataset[info_var['out_name']].attrs['units']+','+info_var['units']) str2=(' keeping non CMIP6') myprint(str1+'\n'+str2+'\n', finfo=finfo) unit_status=False else: str1=(' ... Units consistent with CF') myprint(str1, finfo=finfo) unit_status=True ltest = ['times_bnds', 'time_bnds', 'time_instant_bounds'] for checkbnd in ltest: if 'bounds' in dataset['time'].attrs.keys(): if dataset['time'].attrs['bounds']==checkbnd: if checkbnd not in dataset.data_vars.keys(): del dataset['time'].attrs['bounds'] #print('removed?', dataset['time'].attrs) if 'AEROCOM' in newname.upper(): if 'lat' in dataset.coords and 'lon' in dataset.coords and 'pres' in dataset.coords: finalname = newname.replace(info_var['out_name']+'_', info_var['out_name']+'_3D_') elif 'lat' in dataset.coords and 'lon' in dataset.coords and 'pres' in dataset.coords: finalname = newname.replace(info_var['out_name']+'_', info_var['out_name']+'_3D_') elif 'lat' in dataset.coords and 'lon' in dataset.coords: finalname = newname.replace(info_var['out_name']+'_', info_var['out_name']+'_2D_') else: finalname=newname else: finalname=newname #print('going to save', info_var['out_name'] ) #if '3d' not in info_var['out_name']: if os.path.isfile(finalname): print('File ',finalname, ' is there') print(os.access(finalname, os.W_OK)) if 'airmass' in finalname or 'ec550' in finalname: print('') else: dataset.to_netcdf(finalname, mode='w') dataset.close() #else: # shutil.copy2(ncname,finalname) return unit_status, finalname
[docs]def directory_structure(study, mysettings, clean=False, create=False, finfo=None): import shutil from os.path import join as pjoin cwd = mysettings['outdir']['path'] subf = mysettings['outdir']['subfolder'] filename1 = pjoin(cwd, subf, study, 'monthly/') filename2 = pjoin(cwd, subf, study, 'daily/') filename3 = pjoin(cwd, subf, study, 'hourly/') if clean==True: for filename in [filename1, filename2, filename3]: myprint(' Cleaning of: '+ filename, finfo=finfo) try: listf = os.listdir(filename) if mysettings['safety']['overwrite']==False and len(listf)>0: myprint(' Safety settings stop the program: out-tree is not empty: \n\n', finfo=finfo) myprint(str(listf), finfo=finfo) myprint('-------\n'*2, finfo=finfo) else: shutil.rmtree(filename) except OSError: myprint(' ... ask to clean an empty directory ...', finfo=finfo) pass if create==True: for filename in [filename1, filename2, filename3]: try: os.makedirs(filename) except OSError: pass return filename1, filename2, filename3
def _get_var_checkdir(outname, freqsource, dirMO, dirDA, dirHF): if freqsource=='DA': try: os.stat(dirDA+outname) except: os.mkdir(dirDA+outname) if freqsource=='MO': try: os.stat(dirMO+outname) except: os.mkdir(dirMO+outname) if freqsource=='HF': try: os.stat(dirHF+outname) except: os.mkdir(dirHF+outname) return True def _get_var_checkfiles(outname, yr, study, freqsource, module, timeout, exp, extra, storepath, dirMO, dirDA, dirHF): #dic_struc = { 'ATM':{'MO':('_1M_histmth.nc','_mon'), 'DA':('_1M_histmth.nc','_mon'), # 'HF':('_HF_histh1f.nc','_1hr')} f_tail = '_IPSL_'+study+'_'+yr+'0101_'+yr+'1231'+'.nc' bpath = storepath+exp if module=='ATM': basedir=bpath+'/ATM/Output/'+freqsource+'/'+exp+'_'+yr+'0101_'+yr+'1231' if freqsource=='MO': input_f=basedir +'_1M_histmth.nc' outfile = dirMO+outname+'/'+outname+'_mon'+f_tail if freqsource=='DA': input_f=basedir+'_1D_histday.nc' outfile = dirDA+outname+'/'+outname+'_day'+f_tail if freqsource=='HF': if timeout=='1h': input_f = basedir+'_HF_histh1f.nc' outfile = dirHF+outname+'/'+outname+'_1hr'+f_tail if timeout=='6h': input_f = basedir+'_HF_histh6f.nc' outfile = dirHF+outname+'/'+outname+'_6hr'+f_tail if timeout=='3h': input_f = basedir+'_3H_remotesens.nc' outfile = dirHF+outname+'/'+outname+'_3hr'+f_tail if module=='CHM': if freqsource=='MO': input_f=bpath+'/CHM/Output/MO/'+exp+'_'+yr+'0101_'+yr+'1231'+'_1M_inca_'+extra outfile = dirMO+outname+'/'+outname+'_mon'+f_tail if freqsource=='DA': input_f=bpath+'/CHM/Output/DA/'+exp+'_'+yr+'0101_'+yr+'1231'+'_1D_inca_'+extra outfile = dirDA+outname+'/'+outname+'_day'+f_tail if freqsource=='HF': input_f=bpath+'/CHM/Output/DA/'+exp+'_'+yr+'0101_'+yr+'1231'+'_1D_inca_'+extra if timeout=='1h': outfile = dirHF+outname+'/'+outname+'_1h'+f_tail if timeout=='6h': outfile = dirHF+outname+'/'+outname+'_6h'+f_tail if module=='SRF': input_f=bpath+'/SRF/Output/MO/'+exp+'_'+yr+'0101_'+yr+'1231'+'_1M_sechiba_history.nc' if freqsource=='MO': outfile = dirMO+outname+'/'+outname+'_mon'+f_tail return input_f, outfile
[docs]def solve_time(outfile, outfilen, varname='', out=''): commandnco = ('ncrename -O -d time_counter,time -v time_counter,time ' + outfile + ' ' + outfilen + out) os.system(commandnco) commandnco = 'ncrename -O -v .time_counter_bnds,time_bnds '+outfile+' '+outfilen+out os.system(commandnco) commandnco = 'ncrename -O -d .presnivs,pres -v .presnivs,pres '+outfile+' '+outfilen+out os.system(commandnco) commandnco = 'ncatted -O -a standard_name,pres,o,c,air_pressure '+outfile+' '+outfilen+out os.system(commandnco) commandnco = 'ncatted -O -a bounds,time,o,c,time_bnds '+outfile+' '+outfilen+out os.system(commandnco) commandnco = 'ncks -O -x -v time_centered '+outfile+' '+outfilen+out os.system(commandnco) commandnco = 'ncks -O -x -v time_instant '+outfile+' '+outfilen+out os.system(commandnco) if varname is not '': commandnco = 'ncatted -O -a coordinates,'+varname+',d,, '+outfile+' '+outfilen+out os.system(commandnco) #print(commandnco, varname) return
[docs]def get_var(mysettings, year, varname, freqsource, module, timeout, outname, expcase, study, extra='',index=1,finfo=None): """ - Module= ATM, CHM, etc - name=vmro3 etc - freq=DA, HF, MO for ATM, for CHM all is DA """ dirMO, dirDA, dirHF = directory_structure(study, mysettings, finfo=finfo) cwd = mysettings['outdir']['path'] subf = mysettings['outdir']['subfolder'] storepath = mysettings['storepath'] status = _get_var_checkdir(outname, freqsource, dirMO, dirDA, dirHF) input_f, outfile= _get_var_checkfiles(outname, year, study, freqsource, module, timeout, expcase, extra, storepath, dirMO, dirDA, dirHF) str1=(' Extracting ('+str(index).rjust(4)+'): '+varname.ljust(20)+ ' and save as '+outname.ljust(20)+' .... for year '+ year + ' ... ['+freqsource+' '+module+']') myprint(str1, finfo=finfo) commandcdo =('cdo --silent selvar,' + varname + ' ' + input_f + ' ' + outfile + ' &> delfiles/del' + varname + freqsource + module) #print(commandcdo) os.system(commandcdo) solve_time(outfile, outfile, varname=varname, out=' &>> delfiles/del'+ varname+freqsource+module) if outname!=varname: commandcdo ='cdo --silent -O chname,'+varname+','+outname+' '+outfile+' '+outfile+'x' #print(commandcdo) os.system(commandcdo) os.rename(outfile+'x', outfile) return
[docs]def check_units(varname_list, vars_dataset): lunits = [] for varn in varname_list: lunits.append(vars_dataset[varn].attrs['units']) return lunits
[docs]def scale_variable(vardataset, d_scale_vars, outname, varname, outfile, finfo=None): var_initial = d_scale_vars[outname]['var_initial'] if 'scale_factor' in d_scale_vars[outname].keys(): varscaled = vardataset[var_initial]*d_scale_vars[outname]['scale_factor'] else: varscaled = vardataset[var_initial] # Check if global unit factor and new units is indicated if 'new_units' in d_scale_vars[outname].keys(): units_factor = d_scale_vars[outname]['new_units']['units_factor'] units_name = d_scale_vars[outname]['new_units']['units_name'] myprint(' We will change units from '+vardataset[varname].attrs['units']+ ' to '+units_name, finfo=finfo) myprint(' We use a unit conversion factor of '+str(units_factor), finfo=finfo) else: units_factor = 1.0 try: units_name = vardataset[varname_sur[0]].attrs['units'] except IndexError: units_name = vardataset[varname_alt[0]].attrs['units'] varfinal = varscaled*units_factor varfinal.name = outname varfinal.attrs['units']=units_name varfinal.to_netcdf(outfile, unlimited_dims='time_counter') varfinal.close() return
[docs]def new_variable_altsur(vardataset, d_new_vars, outname, varname, outfile, finfo=None): varname_sur = d_new_vars[outname]['var_surface'] varname_alt = d_new_vars[outname]['var_altitud'] factors_sur = [str(y) for y in d_new_vars[outname]['fac_surface']] factors_alt = [str(y) for y in d_new_vars[outname]['fac_altitud']] lunits =check_units(varname_sur+varname_alt, vardataset) if len(set(lunits))==1: myprint(' internal consistency in units is ... ok ... '+lunits[0], finfo=finfo) else: myprint(str(lunits)+str(varname_sur)+str(varname_alt), finfo=finfo) exit() # Aggregate values over vertical coordinates for ivar, var_alt in enumerate(varname_alt): if ivar==0: var_sum_alt = vardataset[var_alt].sum(dim='presnivs') else: var_sum_alt = (var_sum_alt + vardataset[var_alt].sum(dim='presnivs')*d_new_vars[outname]['fac_altitud'][ivar]) # Aggregate values on surface for ivar, var_sur in enumerate(varname_sur): if 'presnivs' in vardataset[var_sur].dims: myprint(' selecting the pressure ... '+str(vardataset['presnivs'].values[0])+ ' '+str(vardataset['presnivs'].attrs['units']), finfo=finfo) if ivar==0: var_sum_sur = vardataset[var_sur].isel(presnivs=0) else: var_sum_sur = (var_sum_sur + vardataset[var_sur].isel(presnivs=0)*d_new_vars[outname]['fac_surface'][ivar]) else: if ivar==0: var_sum_sur = vardataset[var_sur] else: var_sum_sur = (var_sum_sur + vardataset[var_sur]*d_new_vars[outname]['fac_surface'][ivar]) # Check if global unit factor and new units is indicated if 'new_units' in d_new_vars[outname].keys(): units_factor = d_new_vars[outname]['new_units']['units_factor'] units_name = d_new_vars[outname]['new_units']['units_name'] myprint(' We will change units from '+ vardataset[varname_sur[0]].attrs['units']+ ' to ' + units_name, finfo=finfo) myprint(' We use a unit conversion factor of '+str(units_factor), finfo=finfo) else: units_factor = 1.0 try: units_name = vardataset[varname_sur[0]].attrs['units'] except IndexError: units_name = vardataset[varname_alt[0]].attrs['units'] # Aggregate all together: if len(varname_sur)>=1 and len(varname_alt)>=1: varfinal = (var_sum_alt+var_sum_sur)*units_factor if len(varname_alt)==0: varfinal = var_sum_sur*units_factor if len(varname_sur)==0: varfinal = var_sum_alt*units_factor varfinal.name = outname varfinal.attrs['units']=units_name varfinal.to_netcdf(outfile, unlimited_dims='time_counter') varfinal.close() return
[docs]def new_var(mysettings, year, varname, freqsource, module, timeout, outname, expcase, study, extra='',index=1, finfo=None): """ - Module= ATM, CHM, etc - name=vmro3 etc - freq=DA, HF, MO for ATM, for CHM all is DA """ dirMO, dirDA, dirHF = directory_structure(study, mysettings) cwd = mysettings['outdir']['path'] subf = mysettings['outdir']['subfolder'] storepath = mysettings['storepath'] status = _get_var_checkdir(outname, freqsource, dirMO, dirDA, dirHF) input_f, outfile = _get_var_checkfiles(outname, year, study, freqsource, module, timeout, expcase, extra, storepath, dirMO, dirDA, dirHF) vardataset = xr.open_dataset(input_f, chunks={'time_counter': 10}) # we assume that all variable are in same file. f_new_vars = open("etc/new_variables.yaml", 'r') d_new_vars = yaml.load(f_new_vars) f_scale_vars = open("etc/scale_variables.yaml", 'r') d_scale_vars = yaml.load(f_scale_vars) if outname in d_scale_vars.keys(): str1=( ' Scaling ('+str(index).rjust(4)+'): '+varname.ljust(20)+ ' and save as '+outname.ljust(20)+ ' .... for year '+ year + ' ... ['+freqsource+' '+module+']\n') myprint(str1, finfo=finfo) scale_variable(vardataset, d_scale_vars, outname, varname, outfile, finfo) if outname in d_new_vars.keys(): varname_sur = d_new_vars[outname]['var_surface'] varname_alt = d_new_vars[outname]['var_altitud'] factors_sur = [str(y) for y in d_new_vars[outname]['fac_surface']] factors_alt = [str(y) for y in d_new_vars[outname]['fac_altitud']] str1=( ' Creating ('+str(index).rjust(4)+'): '+varname.ljust(20)+' and save as ' +outname.ljust(20)+' .... for year ' + year + ' ... ['+freqsource+' '+module+']\n') str2=( ' using files ... '+', '.join(varname_sur)+ '\n ... with factors '+', '.join(factors_sur)+ '\n and files ... '+', '.join(varname_alt)+ '\n ... with factors '+', '.join(factors_alt)) myprint(str1+str2, finfo=finfo) new_variable_altsur(vardataset, d_new_vars, outname, varname, outfile, finfo) solve_time(outfile, outfile, varname=varname, out=' &>> delfiles/del' +varname+freqsource+module) return
[docs]def lev_var(mysettings, year, varname, freqsource, module, timeout, outname, expcase, study, extra='', lev='top', index=1, finfo=None): """ - Module= ATM, CHM, etc - name=vmro3 etc - freq=DA, HF, MO for ATM, for CHM all is DA """ dirMO, dirDA, dirHF = directory_structure(study, mysettings, finfo=finfo) cwd = mysettings['outdir']['path'] subf = mysettings['outdir']['subfolder'] storepath = mysettings['storepath'] status = _get_var_checkdir(outname, freqsource, dirMO, dirDA, dirHF) input_f, outfile = _get_var_checkfiles(outname, year, study, freqsource, module, timeout, expcase, extra, storepath, dirMO, dirDA, dirHF) vardataset = xr.open_dataset(input_f) # we assume that all variable are in same file. if outname in ['rlut']: varname_lev = ['rlu'] if outname in ['rlutcs']: varname_lev = ['rlucs'] str1=(' Creating ('+str(index).rjust(4)+'): '+varname.ljust(20)+ ' and save as '+outname.ljust(20)+' .... for year '+ year + ' ... ['+freqsource+' '+module+']') str2=(' using files ... '+','.join(varname_lev)) myprint(str1+'\n'+str2, finfo=finfo) if lev=='top': presval = -1 elif lev=='surf': presval = 0 else: print('Problem in lev_var') exit() var_lev = vardataset[varname_lev[0]].isel(presnivs=presval) myprint(' selecting the pressure ... '+str(vardataset['presnivs'].values[presval]), finfo=finfo) var_lev.name = outname var_lev.to_netcdf(outfile, unlimited_dims='time_counter') solve_time(outfile, outfile, varname=varname, out=' &>> delfiles/del'+ varname+freqsource+module) return
[docs]def read_process_file(mysettings, process_file, year, expcase, study, finfo=None): import pandas as pd pd.set_option('display.width', 120) df = pd.read_table(process_file, skiprows=0, header=0, delim_whitespace=True) two_times = df.duplicated(subset='varname-out') if True in two_times.values: myprint('\n\n\n ============= CHECK your diagnostics file '+process_file +'\n ============= it has two repeated diagnostics final varnames!!\n\n\n', finfo=finfo) exit() list_vars = [] for index, row in df.iterrows(): gc.collect() list_vars.append(row['varname-out']) if mysettings[study]['preprocess']==True: if row['method']=='new_var': new_var(mysettings, year, row['varname-source'], row['freq-source'], row['module-IPSL'], row['freq-specific'], row['varname-out'], expcase, study, extra=row['extra-info-source'], index=index, finfo=finfo) if row['method']=='get_var': get_var(mysettings, year, row['varname-source'], row['freq-source'], row['module-IPSL'], row['freq-specific'], row['varname-out'], expcase, study, extra=row['extra-info-source'], index=index, finfo=finfo) if row['method']=='lev_var': lev_var(mysettings, year, row['varname-source'], row['freq-source'], row['module-IPSL'], row['freq-specific'], row['varname-out'], expcase, study, extra=row['extra-info-source'], index=index, finfo=finfo) return list_vars
[docs]def process_files(mysettings, year, expcase, study, proc_file, finfo=None): if str(proc_file)!='none': myprint(' ... aprox. number variables: '+ str(sum(1 for line in open(proc_file))-1), finfo=finfo) processed_vars = read_process_file(mysettings, proc_file, year, expcase, study, finfo=finfo) else: myprint(' ... No processing ... [process-file==None]', finfo=finfo) processed_vars=[] return processed_vars