import numpy as np
import pandas as pd
from ninolearn.utils import print_header
from ninolearn.IO.read_processed import data_reader
from ninolearn.learn.skillMeasures import seasonal_correlation
from ninolearn.learn.skillMeasures import mean_srmse, seasonal_srmse
from ninolearn.learn.skillMeasures import nll_gaussian
from ninolearn.learn.fit import lead_times, n_lead, decades, n_decades
from scipy.stats import pearsonr
# =============================================================================
# ALL SEASONS EVALUATION
# =============================================================================
[docs]def evaluation_nll(model_name, mean_name = 'mean', std_name='std'):
"""
Evaluate the model using the negativ log-likelihood skill for the full time series.
"""
reader = data_reader(startdate='1963-01', enddate='2017-12')
# scores for the full timeseries
nll = np.zeros(n_lead)
# ONI observation
obs = reader.read_csv('oni')
for i in range(n_lead):
pred_all = reader.read_forecasts(model_name, lead_times[i])
mean = pred_all[mean_name]
std = pred_all[std_name]
# calculate all seasons scores
nll[i] = nll_gaussian(obs, mean, std)
return nll
[docs]def evaluation_correlation(model_name, variable_name = 'mean'):
"""
Evaluate the model using the correlation skill for the full time series.
:type model_name: str
:param model_name: The name of the model.
:type variable_name: str
:param variable_name: The name of the variable which shell be evaluated\
against the ONI prediction.
:returns: The correlation skill for the 0, 3, 6, 9, 12 and 15-month lead\
time and the corresponding p values.
"""
reader = data_reader(startdate='1963-01', enddate='2017-12')
# scores for the full timeseries
r = np.zeros(n_lead)
p = np.zeros(n_lead)
# ONI observation
obs = reader.read_csv('oni')
for i in range(n_lead):
pred_all = reader.read_forecasts(model_name, lead_times[i])
pred = pred_all[variable_name]
# calculate all seasons scores
r[i], p[i] = pearsonr(obs, pred)
return r, p
[docs]def evaluation_srmse(model_name, variable_name = 'mean'):
"""
Evaluate the model using the standardized root-mean-squarred error (SRMSE)
for the full time series. Standardized means that the the the RMSE of each
season is divided by the corresponding standard deviation of the ONI in
that season (standard deviation has a seasonal cycle). Then, these
seasonal SRMSE averaged to get the SRMSE of the full time series..
:type model_name: str
:param model_name: The name of the model.
:type variable_name: str
:param variable_name: The name of the variable which shell be evaluated\
against the ONI prediction.
:returns: The standardized RMSE for the 0, 3, 6, 9, 12 and 15-month lead\
time.
"""
reader = data_reader(startdate='1963-01', enddate='2017-12')
# scores for the full timeseries
srmse = np.zeros(n_lead)
# ONI observation
obs = reader.read_csv('oni')
for i in range(n_lead):
pred_all = reader.read_forecasts(model_name, lead_times[i])
pred = pred_all[variable_name]
srmse[i] = mean_srmse(obs, pred, obs.index - pd.tseries.offsets.MonthBegin(1))
return srmse
# =============================================================================
# DECADAL EVALUATION
# =============================================================================
[docs]def evaluation_decadal_nll(model_name, mean_name = 'mean', std_name='std'):
"""
Evaluate the model in the decades 1963-1971, 1972-1981, ..., 2012-2017 \
using the negative log-likelihood.
"""
reader = data_reader(startdate='1963-01', enddate='2017-12')
# decadal scores
decadal_nll = np.zeros((n_lead, n_decades-1))
# ONI observation
obs = reader.read_csv('oni')
obs_time = obs.index
for i in range(n_lead):
pred_all = reader.read_forecasts(model_name, lead_times[i])
pred_mean = pred_all[mean_name]
pred_std = pred_all[std_name]
for j in range(n_decades-1):
indeces = (obs_time>=f'{decades[j]}-01-01') & (obs_time<=f'{decades[j+1]}-12-01')
decadal_nll[i, j] = nll_gaussian(obs[indeces], pred_mean[indeces], pred_std[indeces])
return decadal_nll
[docs]def evaluation_decadal_correlation(model_name, variable_name = 'mean'):
"""
Evaluate the model in the decades 1963-1971, 1972-1981, ..., 2012-2017 using the correlation skill-
:type model_name: str
:param model_name: The name of the model.
:type variable_name: str
:param variable_name: The name of the variable which shell be evaluated\
against the ONI prediction.
:returns: The correlation skill for the 0, 3, 6, 9, 12 and 15-month lead\
time and the corresponding p values for the respective decades. The\
returned arrays have the shape (lead time, decades).
"""
reader = data_reader(startdate='1963-01', enddate='2017-12')
# decadal scores
decadal_r = np.zeros((n_lead, n_decades-1))
decadal_p = np.zeros((n_lead, n_decades-1))
# ONI observation
obs = reader.read_csv('oni')
obs_time = obs.index
for i in range(n_lead):
pred_all = reader.read_forecasts(model_name, lead_times[i])
pred = pred_all[variable_name]
for j in range(n_decades-1):
indeces = (obs_time>=f'{decades[j]}-01-01') & (obs_time<=f'{decades[j+1]-1}-12-01')
decadal_r[i, j], decadal_p[i, j] = pearsonr(obs[indeces].values, pred[indeces].values)
return decadal_r, decadal_p
[docs]def evaluation_decadal_srmse(model_name, variable_name = 'mean', decadal=None):
"""
Evaluate the model in the decades 1963-1971, 1972-1981, ..., 2012-2017 \
using the standardized RMSE.
:type model_name: str
:param model_name: The name of the model.
:type variable_name: str
:param variable_name: The name of the variable which shell be evaluated\
against the ONI prediction.
:returns: The SRMSE for the 0, 3, 6, 9, 12 and 15-month lead\
time respective decades. The returned array has the shape (lead time, \
decades).
"""
reader = data_reader(startdate='1963-01', enddate='2017-12')
# decadal scores
decadal_srmse = np.zeros((n_lead, n_decades-1))
# ONI observation
obs = reader.read_csv('oni')
obs_time = obs.index
for i in range(n_lead):
pred_all = reader.read_forecasts(model_name, lead_times[i])
pred = pred_all[variable_name]
for j in range(n_decades-1):
indeces = (obs_time>=f'{decades[j]}-01-01') & (obs_time<=f'{decades[j+1]}-12-01')
decadal_srmse[i, j] = mean_srmse(obs[indeces], pred[indeces],
obs.index[indeces] - pd.tseries.offsets.MonthBegin(1))
return decadal_srmse
# =============================================================================
# SEASONAL EVALUATION
# =============================================================================
[docs]def evaluation_seasonal_correlation(model_name, variable_name='mean', background='all'):
"""
Evaluate the model in different seasons using the correlation skill.
:type model_name: str
:param model_name: The name of the model.
:type variable_name: str
:param variable_name: The name of the variable which shell be evaluated\
against the ONI prediction.
:returns: The correlation skill for different seasons and the \
0, 3, 6, 9, 12 and 15-month lead time and the corresponding p values for\
the respective seasons and lead times. The returned arrays have the shape \
(lead time, season). The season corresponding to the the array entry [:,0]\
is DJF and to [:,1] is JFM (and so on).
"""
reader = data_reader(startdate='1963-01', enddate='2017-12')
# seasonal scores
seasonal_r = np.zeros((n_lead, 12))
seasonal_p = np.zeros((n_lead, 12))
# ONI observation
oni = reader.read_csv('oni')
if background=="el-nino-like":
obs = oni[(oni.index.year>=1982)&(oni.index.year<=2001)]
elif background=="la-nina-like":
obs = oni[(oni.index.year<1982)|(oni.index.year>2001)]
elif background=="all":
obs = oni
obs_time = obs.index
for i in range(n_lead):
pred_all = reader.read_forecasts(model_name, lead_times[i]).loc[{'target_season':obs_time}]
pred = pred_all[variable_name]
seasonal_r[i, :], seasonal_p[i, :] = seasonal_correlation(obs, pred, obs_time - pd.tseries.offsets.MonthBegin(1))
return seasonal_r, seasonal_p
[docs]def evaluation_seasonal_srmse(model_name, variable_name='mean', background='all'):
"""
Evaluate the model in different seasons using the standardized RMSE.
:type model_name: str
:param model_name: The name of the model.
:type variable_name: str
:param variable_name: The name of the variable which shell be evaluated\
against the ONI prediction.
:returns: The SRMSE for different seasons and the \
0, 3, 6, 9, 12 and 15-month lead times. The returned arrays have the shape \
(lead time, season). The season corresponding to the the array entry [:,0]\
is DJF and to [:,1] is JFM (and so on).
"""
reader = data_reader(startdate='1963-01', enddate='2017-12')
# seasonal scores
seas_srmse = np.zeros((n_lead, 12))
# ONI observation
oni = reader.read_csv('oni')
if background=="el-nino-like":
obs = oni[(oni.index.year>=1982)&(oni.index.year<=2001)]
elif background=="la-nina-like":
obs = oni[(oni.index.year<1982)|(oni.index.year>2001)]
elif background=="all":
obs = oni
obs_time = obs.index
for i in range(n_lead):
pred_all = reader.read_forecasts(model_name, lead_times[i]).loc[{'target_season':obs_time}]
pred = pred_all[variable_name]
seas_srmse[i, :] = seasonal_srmse(obs, pred, obs_time - pd.tseries.offsets.MonthBegin(1))
return seas_srmse