Source code for ninolearn.postprocess.prepare

"""
This module is a collection of methods that are needed to convert the raw
data to a same format.
"""

import pandas as pd
from os.path import join, exists
from os import mkdir
import numpy as np
from datetime import datetime
import xarray as xr

from ninolearn.IO import read_raw
from ninolearn.pathes import rawdir, postdir
from ninolearn.IO.read_post import data_reader


if not exists(postdir):
    print("make a data directory at %s" % postdir)
    mkdir(postdir)


[docs]def season_to_month(season): """ translates a 3-month season string to the corresponding integer of the last month of the season (to ensure not to include any future information when predictions are made later with this data) :type season: string :param season: Season represented by three letters such as 'DJF' """ switcher = {'DJF': 2, 'JFM': 3, 'FMA': 4, 'MAM': 5, 'AMJ': 6, 'MJJ': 7, 'JJA': 8, 'JAS': 9, 'ASO': 10, 'SON': 11, 'OND': 12, 'NDJ': 1, } return switcher[season]
[docs]def season_shift_year(season): """ when the function .season_to_month() is applied the year related to NDJ needs to be shifted by 1. :type season: string :param season: Season represented by three letters such as 'DJF' """ switcher = {'DJF': 0, 'JFM': 0, 'FMA': 0, 'MAM': 0, 'AMJ': 0, 'MJJ': 0, 'JJA': 0, 'JAS': 0, 'ASO': 0, 'SON': 0, 'OND': 0, 'NDJ': 1, } return switcher[season]
[docs]def prep_oni(): """ Add a time axis corresponding to the first day of the central month of a 3-month season. For example: DJF 2019 becomes 2019-01-01. Further, rename some axis. """ print("Prepare ONI timeseries.") data = read_raw.oni() df = ({'year': data.YR.values + data.SEAS.apply(season_shift_year).values, 'month': data.SEAS.apply(season_to_month).values, 'day': data.YR.values/data.YR.values}) dti = pd.to_datetime(df) data.index = dti data.index.name = 'time' data = data.rename(index=str, columns={'ANOM': 'anom'}) data.to_csv(join(postdir, f'oni.csv'))
[docs]def prep_nino_month(index="3.4", detrend=False): """ Add a time axis corresponding to the first day of the central month. """ print("Prepare monthly Nino3.4 timeseries.") period ="M" rawdata = read_raw.nino_anom(index=index, period=period, detrend=detrend) rawdata = rawdata.rename(index=str, columns={'ANOM': 'anomNINO1+2', 'ANOM.1': 'anomNINO3', 'ANOM.2': 'anomNINO4', 'ANOM.3': 'anomNINO3.4'}) dftime = ({'year': rawdata.YR.values, 'month': rawdata.MON.values, 'day': rawdata.YR.values/rawdata.YR.values}) dti = pd.to_datetime(dftime) data = pd.DataFrame(data=rawdata[f"anomNINO{index}"]) data.index = dti data.index.name = 'time' data = data.rename(index=str, columns={f'anomNINO{index}': 'anom'}) filename = f"nino{index}{period}" if detrend: filename = ''.join(filename, "detrend") filename = ''.join((filename,'.csv')) data.to_csv(join(postdir, filename))
[docs]def prep_wwv(cardinal_direction=""): """ Add a time axis corresponding to the first day of the central month of a 3-month season. For example: DJF 2019 becomes 2019-01-01. Further, rename some axis. """ print(f"Prepare WWV {cardinal_direction} timeseries.") data = read_raw.wwv_anom(cardinal_direction=cardinal_direction) df = ({'year': data.date.astype(str).str[:4], 'month': data.date.astype(str).str[4:], 'day': data.date/data.date}) dti = pd.to_datetime(df) data.index = dti data.index.name = 'time' data = data.rename(index=str, columns={'Anomaly': 'anom'}) data.to_csv(join(postdir, f'wwv{cardinal_direction}.csv'))
[docs]def prep_K_index(): """ function that edits the Kirimati index from Bunge and Clarke (2014) """ data = read_raw.K_index() data.index.name = 'time' data.name = 'anom' data.to_csv(join(postdir, f'kindex.csv'), header=True)
[docs]def prep_wwv_proxy(): """ Make a wwv proxy index that uses the K-index from Bunge and Clarke (2014) for the time period between 1955 and 1979 """ reader_wwv = data_reader(startdate='1980-01', enddate='2018-12') wwv = reader_wwv.read_csv('wwv') reader_kindex = data_reader(startdate='1955-01', enddate='1979-12') kindex = reader_kindex.read_csv('kindex') * 10e12 wwv_proxy = kindex.append(wwv) wwv_proxy.to_csv(join(postdir, f'wwv_proxy.csv'), header=True)
[docs]def prep_iod(): """ Prepare the IOD index dataframe """ print("Prepare IOD timeseries.") data = read_raw.iod() data = data.T.unstack() data = data.replace(-999, np.nan) dti = pd.date_range(start='1870-01-01', end='2018-12-01', freq='MS') df = pd.DataFrame(data=data.values,index=dti, columns=['anom']) df.index.name = 'time' df.to_csv(join(postdir, 'iod.csv'))
[docs]def calc_warm_pool_edge(): """ calculate the warm pool edge """ reader = data_reader(startdate='1948-01', enddate='2018-12',lon_min=120, lon_max=290) sst = reader.read_netcdf('sst', dataset='ERSSTv5', processed='') sst_eq = sst.loc[dict(lat=0)] warm_pool_edge = np.zeros(sst_eq.shape[0]) indeces = np.zeros(sst_eq.shape[0]) # TODO not very efficent for i in range(sst_eq.shape[0]): index = np.argwhere(sst_eq[i].values>28.).max() indeces[i] = index slope = sst_eq[i, index] - sst_eq[i, index-1] intercept28C = (sst_eq[i, index] - 28.) * slope + index warm_pool_edge[i] = intercept28C * 2.5 * 111.321 df = pd.DataFrame(data=warm_pool_edge,index=sst.time.values, columns=['total']) df.index.name = 'time' df.to_csv(join(postdir, 'wp_edge.csv')) return warm_pool_edge, indeces
[docs]def prep_other_forecasts(): """ Get other forecasts into a decent format. """ data = read_raw.other_forecasts() n_rows = len(data) # the first target season is assumed not to change first_target_season = '2002-04-01' for i in reversed(range(n_rows)): row_str = data.row[i] if row_str[:8] == "Forecast": last_issued =datetime.strptime(row_str[16:], '%b %Y') break last_target_season = last_issued + pd.tseries.offsets.MonthBegin(10) target_season = pd.date_range(start=first_target_season, end=last_target_season, freq='MS') n_timesteps = len(target_season) lead_time = np.arange(0, 9) n_lead = len(lead_time) # dummy array for the purpose to be filled dummy = np.zeros((n_timesteps, n_lead)) dummy[:,:] = np.nan j = 0 save_data = {} for i in range(n_rows): try: # read the row row_str = data.row[i] # check if it is the last line of a forecast block if row_str[:3]=='end': j+=1 # check if it is a number otherwise go the except test = int(row_str[0:4]) # allocate a new entry if model is new model_name = row_str[40:52].strip() if model_name not in save_data.keys() and model_name!='': save_data[model_name] = dummy.copy() # write data to the considered target period for k in range(9): save_data[model_name][j+k, k] = int(row_str[0+k*4: 4+k*4]) except ValueError: pass # make the final save dictionary save_dict = {} # replace model names that have a '/' in their name (otherwise saving as # netCDF would not be possible) for key in save_data.keys(): key_save = key.replace('/', ' ') save_dict[key_save] = (['target_season', 'lead'], save_data[key]) # maka the final Data set ds = xr.Dataset(save_dict, coords={'target_season': target_season, 'lead': lead_time }) # replace -999 with nans ds=ds.where(ds!=-999) # from unit cK to K ds = ds/100 # save data ds.to_netcdf(join(postdir, f'other_forecasts.nc'))