from os.path import join
import pandas as pd
import xarray as xr
from scipy.io import loadmat
from ninolearn.pathes import rawdir
"""
This module collects a bunch methods to read the raw data files.
"""
[docs]def nino34_anom():
"""
Get the Nino3.4 Index anomaly.
"""
data = pd.read_csv(join(rawdir, "nino34.txt"), delim_whitespace=True)
return data
def oni():
return nino_anom(index="3.4", period ="S")
[docs]def nino_anom(index="3.4", period ="S", detrend=False):
"""
read various Nino indeces from the raw directory
"""
try:
if period == "S":
if index == "3.4" and not detrend:
data = pd.read_csv(join(rawdir, "oni.txt"),
delim_whitespace=True)
else:
msg = "Only not detrended Nino3.4 index is available for seasonal records"
raise Exception(msg)
elif period == "M":
if detrend and index == "3.4":
data = pd.read_csv(join(rawdir, "nino34detrend.txt"),
delim_whitespace=True)
elif not detrend:
data = pd.read_csv(join(rawdir, "nino_1_4.txt"),
delim_whitespace=True)
return data
except UnboundLocalError:
raise Exception("The desired NINO index is not available.")
[docs]def wwv_anom(cardinal_direction=""):
"""
get the warm water volume anomaly
"""
if cardinal_direction != "":
filename = f"wwv_{cardinal_direction}.dat"
else:
filename = "wwv.dat"
data = pd.read_csv(join(rawdir, filename),
delim_whitespace=True, header=4)
return data
[docs]def iod():
"""
get IOD index data
"""
data = pd.read_csv(join(rawdir, "iod.txt"),
delim_whitespace=True, header=None, skiprows=1, skipfooter=7,
index_col=0, engine='python')
return data
def K_index():
data = loadmat(join(rawdir, "Kindex.mat"))
kindex = data['Kindex2_mon_anom'][:,0]
time = pd.date_range(start='1955-01-01', end='2011-12-01', freq='MS')
ds = pd.Series(data=kindex, index=time)
return ds
[docs]def sst_HadISST():
"""
get the sea surface temperature from the ERSST-v5 data set and directly
manipulate the time axis in such a way that the monthly mean values are
assigned to the beginning of a month as this is the default for the other
data sets
"""
data = xr.open_dataset(join(rawdir, "HadISST_sst.nc"))
maxtime = pd.to_datetime(data.time.values.max()).date()
data['time'] = pd.date_range(start='1870-01-01', end=maxtime, freq='MS')
data.sst.attrs['dataset'] = 'HadISST'
return data.sst
[docs]def ustr():
"""
get u-wind stress from ICOADS 1-degree Enhanced
"""
data = xr.open_dataset(join(rawdir, "upstr.mean.nc"))
data.upstr.attrs['dataset'] = 'ICOADS'
return data.upstr
[docs]def uwind():
"""
get u-wind from NCEP/NCAR reanalysis
"""
data = xr.open_dataset(join(rawdir, "uwnd.mon.mean.nc"))
data.uwnd.attrs['dataset'] = 'NCEP'
return data.uwnd
[docs]def vwind():
"""
get v-wind from NCEP/NCAR reanalysis
"""
data = xr.open_dataset(join(rawdir, "vwnd.mon.mean.nc"))
data.vwnd.attrs['dataset'] = 'NCEP'
return data.vwnd
[docs]def sat(mean='monthly'):
"""
Get the surface air temperature from NCEP/NCAR Reanalysis
:param mean: Choose between daily and monthly mean fields
"""
if mean == 'monthly':
data = xr.open_dataset(join(rawdir, "air.mon.mean.nc"))
data.air.attrs['dataset'] = 'NCEP'
return data.air
elif mean == 'daily':
data = xr.open_mfdataset(join(rawdir, 'sat', '*.nc'))
data_return = data.air
data_return.attrs['dataset'] = 'NCEP'
data_return.name = 'air_daily'
return data_return
[docs]def olr():
"""
get v-wind from NCEP/NCAR reanalysis
"""
data = xr.open_dataset(join(rawdir, "olr.mon.mean.nc"))
data.olr.attrs['dataset'] = 'NCAR'
return data.olr
[docs]def ssh():
"""
Get sea surface height. And change some attirbutes and coordinate names
"""
data = xr.open_mfdataset(join(rawdir, 'ssh', '*.nc'),
concat_dim='time_counter')
data_return = data.sossheig.rename({'time_counter': 'time'})
maxtime = pd.to_datetime(data_return.time.values.max()).date()
data_return['time'] = pd.date_range(start='1979-01-01',
end=maxtime,
freq='MS')
data_return.attrs['dataset'] = 'ORAP5'
data_return.name = 'ssh'
return data_return
def godas(variable="sshg"):
ds = xr.open_mfdataset(join(rawdir, f'{variable}_godas', '*.nc'),
concat_dim='time')
if len(ds[variable].shape)==4:
data = ds.loc[dict(level=5)].load()
else:
data = ds.load()
data[variable].attrs['dataset'] = 'GODAS'
return data[variable]
def oras4():
ds = xr.open_mfdataset(join(rawdir, f'ssh_oras4', '*.nc'),
concat_dim='time')
data = ds.load()
data.zos.attrs['dataset'] = 'ORAS4'
return data.zos
def sat_gfdl():
data = xr.open_mfdataset(join(rawdir, 'sat_gfdl', '*.nc'),
concat_dim='time')
data = data.load()
data.tas.attrs['dataset'] = 'GFDL-CM3'
# this change needs to be done to prevent OutOfBoundsError
data['time'] = pd.date_range(start='1700-01-01', end='2199-12-01',freq='MS')
return data.tas
def ssh_gfdl():
data = xr.open_mfdataset(join(rawdir, 'ssh_gfdl', '*.nc'),
concat_dim='time')
#data = data.load()
data.zos.attrs['dataset'] = 'GFDL-CM3'
# this change needs to be done to prevent OutOfBoundsError
data['time'] = pd.date_range(start='1700-01-01', end='2199-12-01',freq='MS')
return data.zos
def sst_gfdl():
data = xr.open_mfdataset(join(rawdir, 'sst_gfdl', '*.nc'),
concat_dim='time')
#data = data.load()
data.tos.attrs['dataset'] = 'GFDL-CM3'
# this change needs to be done to prevent OutOfBoundsError
data['time'] = pd.date_range(start='1700-01-01', end='2199-12-01',freq='MS')
return data.tos
[docs]def hca_mon():
"""
heat content anomaly, seasonal variable to the first day of the middle season
and upsample the data
"""
data = xr.open_dataset(join(rawdir, "hca.nc"), decode_times=False)
data['time'] = pd.date_range(start='1955-02-01', end='2019-02-01', freq='3MS')
data.h18_hc.attrs['dataset'] = 'NODC'
data_raw = data.h18_hc[:,0,:,:]
data_upsampled = data_raw.resample(time='MS').interpolate('linear')
data_upsampled.name = 'hca'
return data_upsampled
def other_forecasts():
data = pd.read_csv(join(rawdir, "other_forecasts.csv"), error_bad_lines=False,
header=None, names=['row'], delimiter=';')
return data