Using the TCN model with multi-step pattern to predict the intensity of the solar cycle¶

In [9]:
import torch
import torch.nn as nn
import torch.optim as optim
import numpy as np
import pandas as pd
import shutil
from sklearn.preprocessing import MinMaxScaler
from tqdm import tqdm_notebook as tqdm

from tensorboardX import SummaryWriter
import matplotlib.pyplot as plt

from darts import TimeSeries
from darts.dataprocessing.transformers import Scaler
from darts.models import TransformerModel, ExponentialSmoothing
from darts.metrics import mape
from darts.utils.statistics import check_seasonality, plot_acf
from darts.datasets import AirPassengersDataset, SunspotsDataset
from darts.models import TCNModel, RNNModel
from darts.utils.callbacks import TFMProgressBar
import warnings

warnings.filterwarnings("ignore")
import logging

logging.disable(logging.CRITICAL)

def generate_torch_kwargs():
    # run torch models on CPU, and disable progress bars for all model stages except training.
    return {
        "pl_trainer_kwargs": {
            "accelerator": "cpu",
            "callbacks": [TFMProgressBar(enable_train_bar_only=True)],
        }
    }
In [2]:
from darts.metrics import mape,rmse,mae

load datasets¶

In [3]:
series = SunspotsDataset().load().astype(np.float32)
train, val = series.split_after(0.75)

plt.figure(figsize=(5, 3))
train.plot(label = "Training Data")
val.plot(label='Validation Data')
plt.ylabel('Sunspot Number')
plt.show()

scaler = Scaler()
train_scaled = scaler.fit_transform(train) #training data
val_scaled = scaler.transform(val)       #test data
series_scaled = scaler.transform(series) #all data

print("the 'monthly sunspots' dataset has {} data points".format(len(series_scaled)))
len(series_scaled),len(train_scaled),len(val_scaled)
the 'monthly sunspots' dataset has 3293 data points
Out[3]:
(3293, 2470, 823)

create model and train¶

In [4]:
input_chunk_length =  1200
output_chunk_length = 120
In [5]:
model_name = "TCN_sun_multi"
model = TCNModel(
    input_chunk_length=input_chunk_length,
    output_chunk_length=output_chunk_length,
#     n_epochs=200,
    n_epochs=200,
    dropout=0.4,
    dilation_base=2,
    weight_norm=True,
    kernel_size=3,
    num_filters=6,
    nr_epochs_val_period=1,
    random_state=0,
    save_checkpoints=True,
    model_name=model_name,
    force_reset=True,
    **generate_torch_kwargs()
)
In [6]:
model.fit(
    series = series_scaled,
    verbose=True
)
Training: 0it [00:00, ?it/s]
Out[6]:
TCNModel(output_chunk_shift=0, kernel_size=3, num_filters=6, num_layers=None, dilation_base=2, weight_norm=True, dropout=0.4, input_chunk_length=1200, output_chunk_length=120, n_epochs=200, nr_epochs_val_period=1, random_state=0, save_checkpoints=True, model_name=TCN_sun_multi, force_reset=True, pl_trainer_kwargs={'accelerator': 'cpu', 'callbacks': [<darts.utils.callbacks.TFMProgressBar object at 0x00000217E818CAD0>]})
In [7]:
# best_model = TCNModel.load_from_checkpoint(
#     model_name=model_name, best=True 
# )
best_model = model

Backtest — fitting the model to the entire dataset¶

In [11]:
backtest = best_model.historical_forecasts(
    series=series_scaled,  # input the entire data sequence
    forecast_horizon=output_chunk_length,  # forecast length
    retrain=False,  # retrain the model 
    verbose=True,  # display detailed running information 
)

plt.figure(figsize=(5, 3))
scaler.inverse_transform(series_scaled).plot(label="actual")  # plot the acual value
scaler.inverse_transform(backtest).plot(label="predict")  # plot the predict value
plt.legend()  

print("MAE:", mae(scaler.inverse_transform(backtest), scaler.inverse_transform(series_scaled)))
print("RMSE:", rmse(scaler.inverse_transform(backtest), scaler.inverse_transform(series_scaled))) 
MAE: 18.918198
RMSE: 25.549845

Evaluating the Model¶

In [12]:
cycle15 = "19130801"
cycle16 = "19230801"
cycle17 = "19330901"
cycle18 = "19440201"
cycle19 = "19540401"

cycle20 = "19641001"         # "19641001" ,
cycle21 =  "19760301"        # "19760301",
cycle22 =  "19860901"        # "19860901",
cycle23 = "19960801"         #"19960801"
cycle24 = "20081201"         #"20081201"
cycle25 = "20191201"         #"20191201" 

before_series_cycle25, series_cycle25 = series_scaled.split_after(pd.Timestamp("20191101"))
before_series_cycle24, series_cycle24 = before_series_cycle25.split_after(pd.Timestamp("20081101"))
before_series_cycle23, series_cycle23 = before_series_cycle24.split_after(pd.Timestamp("19960701"))
before_series_cycle22, series_cycle22 = before_series_cycle23.split_after(pd.Timestamp("19860801"))
before_series_cycle21, series_cycle21 = before_series_cycle22.split_after(pd.Timestamp("19760201"))
before_series_cycle20, series_cycle20 = before_series_cycle21.split_after(pd.Timestamp("19640901"))

before_series_cycle19, series_cycle19 = before_series_cycle20.split_after(pd.Timestamp("19540301"))
before_series_cycle18, series_cycle18 = before_series_cycle19.split_after(pd.Timestamp("19440101"))
before_series_cycle17, series_cycle17 = before_series_cycle18.split_after(pd.Timestamp("19330801"))
before_series_cycle16, series_cycle16 = before_series_cycle17.split_after(pd.Timestamp("19230701"))
before_series_cycle15, series_cycle15 = before_series_cycle16.split_after(pd.Timestamp("19130701"))

plt.figure(figsize=(5, 3))
scaler.inverse_transform(series_scaled).plot(label="actual")  # plot the value for entire datas
scaler.inverse_transform(series_cycle25).plot(label="series_cycle25")  # plot the value for sc25
scaler.inverse_transform(series_cycle24).plot(label="series_cycle24")  # plot the value for sc24
scaler.inverse_transform(series_cycle23).plot(label="series_cycle23")  # plot the value for sc23
scaler.inverse_transform(series_cycle22).plot(label="series_cycle22")  # plot the value for sc22
scaler.inverse_transform(series_cycle21).plot(label="series_cycle21") 
scaler.inverse_transform(series_cycle20).plot(label="series_cycle20")  

scaler.inverse_transform(series_cycle19).plot(label="sc19")  
scaler.inverse_transform(series_cycle18).plot(label="sc18")  
scaler.inverse_transform(series_cycle17).plot(label="sc17")  
scaler.inverse_transform(series_cycle16).plot(label="sc16")  
scaler.inverse_transform(series_cycle15).plot(label="sc15")  
plt.legend()
Out[12]:
<matplotlib.legend.Legend at 0x21795dbf890>
In [13]:
def eval_model(model,before_series,series,title):
    pred_series = model.predict(
        n=len(series),
        series = before_series
    )   
    
    series = scaler.inverse_transform(series)
    pred_series = scaler.inverse_transform(pred_series)
    plt.figure(figsize=(5, 3))
    
    print("MAE: ",mae(pred_series, series))
    print("RMSE: ",rmse(pred_series, series))
    print(len(pred_series),len(series))
    
    series.plot(label="Actual")
    pred_series.plot(label="Predict")
    plt.title(title)
    plt.legend()
    plt.show()
    
    print(series.time_index[0], pred_series.time_index[0])
In [14]:
eval_model(best_model, before_series_cycle15,series_cycle15,"Solar Cycle15")
MAE:  18.684488
RMSE:  26.501322
120 120
1913-08-01 00:00:00 1913-08-01 00:00:00
In [15]:
eval_model(best_model, before_series_cycle16,series_cycle16,"Solar Cycle16")
MAE:  18.600054
RMSE:  19.849741
121 121
1923-08-01 00:00:00 1923-08-01 00:00:00
In [16]:
eval_model(best_model, before_series_cycle17,series_cycle17,"Solar Cycle17")
MAE:  14.887585
RMSE:  18.96901
125 125
1933-09-01 00:00:00 1933-09-01 00:00:00
In [17]:
eval_model(best_model, before_series_cycle18,series_cycle18,"Solar Cycle18")
MAE:  14.8080435
RMSE:  16.615202
122 122
1944-02-01 00:00:00 1944-02-01 00:00:00
In [18]:
eval_model(best_model, before_series_cycle19,series_cycle19,"Solar Cycle19")
MAE:  28.659431
RMSE:  36.10116
126 126
1954-04-01 00:00:00 1954-04-01 00:00:00
In [19]:
eval_model(best_model, before_series_cycle20,series_cycle20,"Solar Cycle20")
MAE:  8.651718
RMSE:  11.246224
137 137
1964-10-01 00:00:00 1964-10-01 00:00:00
In [20]:
eval_model(best_model, before_series_cycle21,series_cycle21,"Solar Cycle21")
MAE:  29.228825
RMSE:  35.828457
126 126
1976-03-01 00:00:00 1976-03-01 00:00:00
In [21]:
eval_model(best_model, before_series_cycle22,series_cycle22,"Solar Cycle22")
MAE:  25.150364
RMSE:  28.17766
119 119
1986-09-01 00:00:00 1986-09-01 00:00:00
In [22]:
eval_model(best_model, before_series_cycle23,series_cycle23,"Solar Cycle23")
MAE:  11.6845455
RMSE:  17.102806
148 148
1996-08-01 00:00:00 1996-08-01 00:00:00
In [23]:
eval_model(best_model, before_series_cycle24,series_cycle24,"Solar Cycle24")
MAE:  28.078577
RMSE:  35.904285
132 132
2008-12-01 00:00:00 2008-12-01 00:00:00
In [24]:
eval_model(best_model, before_series_cycle25,series_cycle25,"Solar Cycle25")
MAE:  8.285022
RMSE:  10.659435
48 48
2019-12-01 00:00:00 2019-12-01 00:00:00

fitting result for solar cycle from 15-25¶

In [25]:
def backtest(testing_model,cycle, series_cycle,title):
    backtest = testing_model.historical_forecasts(
        series=series_scaled,  # input the entire data sequence
        start=pd.Timestamp(cycle),  # start time
        forecast_horizon=output_chunk_length,  # forecast length
        retrain=False,  #  retrain the model
        verbose=True,  # display detailed running information
    )
    backtest = backtest[0:len(series_cycle)]
    
    series_cycle = scaler.inverse_transform(series_cycle)
    backtest = scaler.inverse_transform(backtest)
    print("MAE:", mae(backtest,series_cycle)) 
    print("RMSE:", rmse(backtest,series_cycle))
    
    plt.figure(figsize=(5, 3))
    series_cycle.plot(label="Actual")  # plot the actual value
    backtest.plot(label="Fitting")  # plot the predict value
    plt.title(title)
    plt.legend()  
    print(len(backtest),len(series_cycle))
    print(backtest.time_index[0],series_cycle.time_index[0])

    return backtest
In [26]:
pred_series_cycle15 = backtest(best_model,"19030901",series_cycle15,"Solar Cycle15") 
MAE: 18.662493
RMSE: 26.491293
120 120
1913-08-01 00:00:00 1913-08-01 00:00:00
In [27]:
pred_series_cycle16 = backtest(best_model,"19130901",series_cycle16,"Solar Cycle16") 
MAE: 18.621532
RMSE: 19.866823
121 121
1923-08-01 00:00:00 1923-08-01 00:00:00
In [28]:
pred_series_cycle17 = backtest(best_model,"19231001",series_cycle17,"Solar Cycle17") 
MAE: 14.719825
RMSE: 18.904537
125 125
1933-09-01 00:00:00 1933-09-01 00:00:00
In [29]:
pred_series_cycle18 = backtest(best_model,"19340301",series_cycle18,"Solar Cycle18") 
MAE: 14.82986
RMSE: 16.614609
122 122
1944-02-01 00:00:00 1944-02-01 00:00:00
In [30]:
pred_series_cycle19 = backtest(best_model,"19440501",series_cycle19,"Solar Cycle19") 
MAE: 28.354893
RMSE: 36.033733
126 126
1954-04-01 00:00:00 1954-04-01 00:00:00
In [31]:
pred_series_cycle20 = backtest(best_model,"19541101",series_cycle20,"Solar Cycle20")  
MAE: 8.622017
RMSE: 11.2970915
137 137
1964-10-01 00:00:00 1964-10-01 00:00:00
In [32]:
pred_series_cycle21 = backtest(best_model,"19660401",series_cycle21,"Solar Cycle21") 
MAE: 28.949572
RMSE: 35.752636
126 126
1976-03-01 00:00:00 1976-03-01 00:00:00
In [33]:
pred_series_cycle22 = backtest(best_model,"19761001", series_cycle22,"Solar Cycle22")
MAE: 25.193789
RMSE: 28.231571
119 119
1986-09-01 00:00:00 1986-09-01 00:00:00
In [34]:
pred_series_cycle23 = backtest(best_model,"19860901",series_cycle23,"Solar Cycle23") 
MAE: 12.342292
RMSE: 18.46485
148 148
1996-08-01 00:00:00 1996-08-01 00:00:00
In [35]:
pred_series_cycle24 = backtest(best_model,"19990101",series_cycle24,"Solar Cycle24")
MAE: 25.132095
RMSE: 34.449017
132 132
2008-12-01 00:00:00 2008-12-01 00:00:00
In [36]:
pred_series_cycle25 = backtest(best_model,"20100101", series_cycle25,"Solar Cycle25")
MAE: 8.385198
RMSE: 10.805431
48 48
2019-12-01 00:00:00 2019-12-01 00:00:00

Predict the solar cycle 25¶

In [37]:
def predict(model, n, series,val_scaled):
    pred_series = model.predict(n=n, series= series) 
    
    pred_series = scaler.inverse_transform(pred_series)
    val_scaled = scaler.inverse_transform(val_scaled)
    
    plt.figure(figsize=(5, 3))
    val_scaled.plot(label="Actual")
    pred_series.plot(label="Forecast Solar Cycle25")
    plt.legend()
    plt.show()
    
    return pred_series
    
pred_series_cycle = predict(best_model, 90, series_scaled,val_scaled)
pred_series_cycle.time_index[0]
Out[37]:
Timestamp('2023-12-01 00:00:00')
In [38]:
darts_to_pd = TimeSeries.pd_dataframe(pred_series_cycle) 

max_date = darts_to_pd['Sunspots'].idxmax() #date
print("The date when the peak solar activity occurs:",max_date)
max_value = darts_to_pd.loc[max_date]
print("The peak value :",max_value['Sunspots'])

min_date = darts_to_pd['Sunspots'].idxmin() #date
print("The date when solar cycle 25 ends:",min_date)
min_value = darts_to_pd.loc[min_date]
The date when the peak solar activity occurs: 2024-04-01 00:00:00
The peak value : 120.59972
The date when solar cycle 25 ends: 2030-04-01 00:00:00
In [39]:
plt.figure(figsize=(5, 3))
scaler.inverse_transform(series_cycle25).plot()
pred_series_cycle.plot()
Out[39]:
<Axes: xlabel='Month'>
In [ ]:
 
In [ ]: