Using the TCN model with one-step pattern to predict the intensity of the solar cycle¶

In [ ]:
import torch
import torch.nn as nn
import torch.optim as optim
import numpy as np
import pandas as pd
import shutil
from sklearn.preprocessing import MinMaxScaler
from tqdm import tqdm_notebook as tqdm

from tensorboardX import SummaryWriter
import matplotlib.pyplot as plt

from darts import TimeSeries
from darts.dataprocessing.transformers import Scaler
from darts.models import TransformerModel, ExponentialSmoothing
from darts.metrics import mape
from darts.utils.statistics import check_seasonality, plot_acf
from darts.datasets import AirPassengersDataset, SunspotsDataset
from darts.models import TCNModel, RNNModel
from darts.utils.callbacks import TFMProgressBar
import warnings

warnings.filterwarnings("ignore")
import logging

logging.disable(logging.CRITICAL)

def generate_torch_kwargs():
    # run torch models on CPU, and disable progress bars for all model stages except training.
    return {
        "pl_trainer_kwargs": {
            "accelerator": "cpu",
            "callbacks": [TFMProgressBar(enable_train_bar_only=True)],
        }
    }
In [38]:
from darts.metrics import mape,rmse,mae

load datasets¶

In [3]:
series = SunspotsDataset().load().astype(np.float32)

train, val = series.split_after(0.75)
train.plot(label = "Training Data")
val.plot(label='Validation Data')
plt.show()

scaler = Scaler()
train_scaled = scaler.fit_transform(train) #training data
val_scaled = scaler.transform(val)       #test data
series_scaled = scaler.transform(series) #all data

print("the 'monthly sunspots' dataset has {} data points".format(len(series_scaled)))
the 'monthly sunspots' dataset has 3293 data points

create model and train¶

In [4]:
input_chunk_length = 20
In [5]:
model_name = "TCN_sun_one"
model = TCNModel(
    input_chunk_length=input_chunk_length,
    output_chunk_length=1,
    n_epochs=30,
    dropout=0,
    dilation_base=2,
    weight_norm=True,
    kernel_size=3,
    num_filters=6,
    nr_epochs_val_period=1,
    random_state=0,
    save_checkpoints=True,
    model_name=model_name,
    force_reset=True,
    **generate_torch_kwargs()
)
In [7]:
model.fit(
    series=train_scaled, 
    val_series=val_scaled, 
    verbose=True
)
Training: 0it [00:00, ?it/s]
Out[7]:
TCNModel(output_chunk_shift=0, kernel_size=3, num_filters=6, num_layers=None, dilation_base=2, weight_norm=True, dropout=0, input_chunk_length=20, output_chunk_length=1, n_epochs=30, nr_epochs_val_period=1, random_state=0, save_checkpoints=True, model_name=TCN_sun_one, force_reset=True, pl_trainer_kwargs={'accelerator': 'cpu', 'callbacks': [<darts.utils.callbacks.TFMProgressBar object at 0x000002D3FD01DED0>]})
In [8]:
best_model = TCNModel.load_from_checkpoint(
    model_name=model_name, best=True 
)

Backtest — fitting the model to the entire dataset¶

In [9]:
backtest = best_model.historical_forecasts(
    series=series_scaled,  # input the entire data sequence
    forecast_horizon=120,  # forcast length
    retrain=False,  # retrain the model
    verbose=True,  # display the running information
)

plt.figure(figsize=(5, 3))
scaler.inverse_transform(series_scaled).plot(label="actual")  # plot the actual value
scaler.inverse_transform(backtest).plot(label="mape:{:.2f}".format(mape(backtest, series_scaled)))  # plot the predict value
plt.legend()  

print("RMSE:", rmse(scaler.inverse_transform(series_scaled),scaler.inverse_transform(backtest)))
print("MAE:", mae(scaler.inverse_transform(series_scaled),scaler.inverse_transform(backtest)))
RMSE: 58.841866
MAE: 42.917446

Evaluating the Model¶

In [10]:
cycle15 = "19130801"
cycle16 = "19230801"
cycle17 = "19330901"
cycle18 = "19440201"
cycle19 = "19540401"

cycle20 = "19641001"         # "19641001" ,
cycle21 =  "19760301"        # "19760301",
cycle22 =  "19860901"        # "19860901",
cycle23 = "19960801"         #"19960801"
cycle24 = "20081201"         #"20081201"
cycle25 = "20191201"         #"20191201" 

before_series_cycle25, series_cycle25 = series_scaled.split_after(pd.Timestamp("20191101"))
before_series_cycle24, series_cycle24 = before_series_cycle25.split_after(pd.Timestamp("20081101"))
before_series_cycle23, series_cycle23 = before_series_cycle24.split_after(pd.Timestamp("19960701"))
before_series_cycle22, series_cycle22 = before_series_cycle23.split_after(pd.Timestamp("19860801"))
before_series_cycle21, series_cycle21 = before_series_cycle22.split_after(pd.Timestamp("19760201"))
before_series_cycle20, series_cycle20 = before_series_cycle21.split_after(pd.Timestamp("19640901"))

before_series_cycle19, series_cycle19 = before_series_cycle20.split_after(pd.Timestamp("19540301"))
before_series_cycle18, series_cycle18 = before_series_cycle19.split_after(pd.Timestamp("19440101"))
before_series_cycle17, series_cycle17 = before_series_cycle18.split_after(pd.Timestamp("19330801"))
before_series_cycle16, series_cycle16 = before_series_cycle17.split_after(pd.Timestamp("19230701"))
before_series_cycle15, series_cycle15 = before_series_cycle16.split_after(pd.Timestamp("19130701"))

plt.figure(figsize=(5, 3))
scaler.inverse_transform(series_scaled).plot(label="actual")  # plot the value for entire datas
scaler.inverse_transform(series_cycle25).plot(label="sc25")  # plot the value for sc25
scaler.inverse_transform(series_cycle24).plot(label="sc24")  # plot the value for sc24
scaler.inverse_transform(series_cycle23).plot(label="sc23")  # plot the value for sc23
scaler.inverse_transform(series_cycle22).plot(label="sc22")  # plot the value for sc22
scaler.inverse_transform(series_cycle21).plot(label="sc21")  # plot the value for sc21
scaler.inverse_transform(series_cycle20).plot(label="sc20")  # plot the value for sc20

scaler.inverse_transform(series_cycle19).plot(label="sc19")  # plot the value for sc19
scaler.inverse_transform(series_cycle18).plot(label="sc18")  # plot the value for sc18
scaler.inverse_transform(series_cycle17).plot(label="sc17")  # plot the value for sc17
scaler.inverse_transform(series_cycle16).plot(label="sc16")  # plot the value for sc16
scaler.inverse_transform(series_cycle15).plot(label="sc15")  # plot the value for sc15
plt.legend() 
Out[10]:
<matplotlib.legend.Legend at 0x2d3a71fad10>
In [11]:
def eval_model(model,before_series,series,title):
    pred_series = model.predict(
        n=len(series),
        series = before_series
    )   
    
    series = scaler.inverse_transform(series)
    pred_series = scaler.inverse_transform(pred_series)
    print("MAE:",mae(pred_series, series))
    print("RMSE:",rmse(series,pred_series))
    print("prediction length:",len(series))
    print(series.time_index[0], pred_series.time_index[0])
    
    plt.figure(figsize=(5, 3))
    series.plot(label="Actual")
    pred_series.plot(label="Predict")
    plt.title(title)
    plt.legend()
    plt.show() 
In [12]:
eval_model(best_model, before_series_cycle15,series_cycle15,"Solar Cycle15")
MAE: 27.084196
RMSE: 35.17347
prediction length: 120
1913-08-01 00:00:00 1913-08-01 00:00:00
In [13]:
eval_model(best_model, before_series_cycle16,series_cycle16,"Solar Cycle16")
MAE: 43.896187
RMSE: 51.111748
prediction length: 121
1923-08-01 00:00:00 1923-08-01 00:00:00
In [14]:
eval_model(best_model, before_series_cycle17,series_cycle17,"Solar Cycle17")
MAE: 31.9774
RMSE: 38.426548
prediction length: 125
1933-09-01 00:00:00 1933-09-01 00:00:00
In [15]:
eval_model(best_model, before_series_cycle18,series_cycle18,"Solar Cycle18")
MAE: 67.76187
RMSE: 78.525894
prediction length: 122
1944-02-01 00:00:00 1944-02-01 00:00:00
In [16]:
eval_model(best_model, before_series_cycle19,series_cycle19,"Solar Cycle19")
MAE: 68.43466
RMSE: 90.30483
prediction length: 126
1954-04-01 00:00:00 1954-04-01 00:00:00
In [17]:
eval_model(best_model, before_series_cycle20,series_cycle20,"Solar Cycle20")
MAE: 41.625347
RMSE: 49.949512
prediction length: 137
1964-10-01 00:00:00 1964-10-01 00:00:00
In [18]:
eval_model(best_model, before_series_cycle21,series_cycle21,"Solar Cycle21")
MAE: 62.162727
RMSE: 72.85285
prediction length: 126
1976-03-01 00:00:00 1976-03-01 00:00:00
In [19]:
eval_model(best_model, before_series_cycle22,series_cycle22,"Solar Cycle22")
MAE: 58.236973
RMSE: 65.86109
prediction length: 119
1986-09-01 00:00:00 1986-09-01 00:00:00
In [20]:
eval_model(best_model, before_series_cycle23,series_cycle23,"Solar Cycle23")
MAE: 27.995817
RMSE: 34.597046
prediction length: 148
1996-08-01 00:00:00 1996-08-01 00:00:00
In [21]:
eval_model(best_model, before_series_cycle24,series_cycle24,"Solar Cycle24")
MAE: 39.53812
RMSE: 49.80267
prediction length: 132
2008-12-01 00:00:00 2008-12-01 00:00:00
In [22]:
eval_model(best_model, before_series_cycle25,series_cycle25,"Solar Cycle25")
MAE: 12.2728195
RMSE: 15.573511
prediction length: 48
2019-12-01 00:00:00 2019-12-01 00:00:00

fitting result for solar cycle from 15-25¶

In [23]:
def backtest(testing_model,cycle, series_cycle,title):
    backtest = testing_model.historical_forecasts(
        series=series_scaled,  # input the entire data sequence
        start=pd.Timestamp(cycle),  # start time 
        forecast_horizon=1,  # forecast length
        retrain=False,  # retrain the model
        verbose=True,  # display detailed running information
    )
    backtest = backtest[0:len(series_cycle)]
    
    series_cycle = scaler.inverse_transform(series_cycle)
    backtest = scaler.inverse_transform(backtest)
    print(len(backtest),len(series_cycle))
    
    plt.figure(figsize=(5, 3))
    series_cycle.plot(label="Actual")  
    backtest.plot(label="Fitting")  
    plt.title(title)
    plt.legend()  
    
    print("MAE:", mae(backtest,series_cycle))
    print("RMSE:", rmse(backtest,series_cycle))
    print(series_cycle.time_index[0],backtest.time_index[0])
    
    return backtest
In [24]:
pred_series_cycle15 = backtest(best_model,cycle15,series_cycle15,"Solar Cycle15") 
120 120
MAE: 1.94609
RMSE: 2.632276
1913-08-01 00:00:00 1913-08-01 00:00:00
In [25]:
pred_series_cycle16 = backtest(best_model,cycle16,series_cycle16,"Solar Cycle16") 
121 121
MAE: 1.4749224
RMSE: 1.9877961
1923-08-01 00:00:00 1923-08-01 00:00:00
In [26]:
pred_series_cycle17 = backtest(best_model,cycle17,series_cycle17,"Solar Cycle17") 
125 125
MAE: 1.8153664
RMSE: 2.3345559
1933-09-01 00:00:00 1933-09-01 00:00:00
In [27]:
pred_series_cycle18 = backtest(best_model,cycle18,series_cycle18,"Solar Cycle18") 
122 122
MAE: 2.0513554
RMSE: 2.6425087
1944-02-01 00:00:00 1944-02-01 00:00:00
In [28]:
pred_series_cycle19 = backtest(best_model,cycle19,series_cycle19,"Solar Cycle19") 
126 126
MAE: 3.2370994
RMSE: 4.801907
1954-04-01 00:00:00 1954-04-01 00:00:00
In [29]:
pred_series_cycle20 = backtest(best_model,cycle20,series_cycle20,"Solar Cycle20")  
137 137
MAE: 1.4238318
RMSE: 1.7601632
1964-10-01 00:00:00 1964-10-01 00:00:00
In [30]:
pred_series_cycle21 = backtest(best_model,cycle21,series_cycle21,"Solar Cycle21") 
126 126
MAE: 1.8858839
RMSE: 2.487002
1976-03-01 00:00:00 1976-03-01 00:00:00
In [31]:
pred_series_cycle22 = backtest(best_model,cycle22, series_cycle22,"Solar Cycle22")
119 119
MAE: 1.7093934
RMSE: 2.2763624
1986-09-01 00:00:00 1986-09-01 00:00:00
In [32]:
pred_series_cycle23 = backtest(best_model,cycle23,series_cycle23,"Solar Cycle23") 
148 148
MAE: 1.4826883
RMSE: 1.9834237
1996-08-01 00:00:00 1996-08-01 00:00:00
In [33]:
pred_series_cycle24 = backtest(best_model,cycle24,series_cycle24,"Solar Cycle24")
132 132
MAE: 1.0973927
RMSE: 1.6072083
2008-12-01 00:00:00 2008-12-01 00:00:00
In [34]:
pred_series_cycle25 = backtest(best_model,cycle25, series_cycle25,"Solar Cycle25")
48 48
MAE: 1.0416851
RMSE: 1.2918218
2019-12-01 00:00:00 2019-12-01 00:00:00

Predict the solar cycle 25¶

In [35]:
def predict(model, n, series):
    # Use the model to make predictions for n time steps
    pred_series = model.predict(n=n, series= series) #Start from the last training set
    
    pred_series = scaler.inverse_transform(pred_series)
    series = scaler.inverse_transform(series)
    
    plt.figure(figsize=(5, 3))
    series.plot(label="Actual")
    pred_series.plot(label="Forecast Solar Cycle25")
    plt.legend()
    plt.show()
    
    return pred_series
    
pred_series_cycle = predict(best_model, 90, val_scaled)
print(pred_series_cycle.time_index[0])
2023-12-01 00:00:00
In [36]:
darts_to_pd = TimeSeries.pd_dataframe(pred_series_cycle) 

max_date = darts_to_pd['Sunspots'].idxmax() #date
print("The date when the peak solar activity occurs:",max_date)
max_value = darts_to_pd.loc[max_date]
print("The peak value :",max_value['Sunspots'])

min_date = darts_to_pd['Sunspots'].idxmin() #date
print("The date when solar cycle 25 ends:",min_date)
min_value = darts_to_pd.loc[min_date]

right_series_cycle25, series_cycle26 = pred_series_cycle.split_after(pd.Timestamp("20301101")) 
The date when the peak solar activity occurs: 2024-10-01 00:00:00
The peak value : 135.33604
The date when solar cycle 25 ends: 2030-11-01 00:00:00
In [37]:
 plt.figure(figsize=(5, 3))
scaler.inverse_transform(series_cycle25).plot(label="Actual")
right_series_cycle25.plot(label="Predict Solar Cycle25")
plt.legend(loc='center')
plt.show()
In [ ]:
 
In [ ]: