import torch
import torch.nn as nn
import torch.optim as optim
import numpy as np
import pandas as pd
import shutil
from sklearn.preprocessing import MinMaxScaler
from tqdm import tqdm_notebook as tqdm
from tensorboardX import SummaryWriter
import matplotlib.pyplot as plt
from darts import TimeSeries
from darts.dataprocessing.transformers import Scaler
from darts.models import TransformerModel, ExponentialSmoothing
from darts.metrics import mape
from darts.utils.statistics import check_seasonality, plot_acf
from darts.datasets import AirPassengersDataset, SunspotsDataset
from darts.models import TCNModel, RNNModel
from darts.utils.callbacks import TFMProgressBar
import warnings
warnings.filterwarnings("ignore")
import logging
logging.disable(logging.CRITICAL)
def generate_torch_kwargs():
# run torch models on CPU, and disable progress bars for all model stages except training.
return {
"pl_trainer_kwargs": {
"accelerator": "cpu",
"callbacks": [TFMProgressBar(enable_train_bar_only=True)],
}
}
from darts.metrics import mape,rmse,mae
series = SunspotsDataset().load().astype(np.float32)
train, val = series.split_after(0.75)
plt.figure(figsize=(5, 3))
train.plot(label = "Training Data")
val.plot(label='Validation Data')
plt.ylabel('Sunspot Number')
plt.show()
scaler = Scaler()
train_scaled = scaler.fit_transform(train) #training data
val_scaled = scaler.transform(val) #test data
series_scaled = scaler.transform(series) #all data
print("the 'monthly sunspots' dataset has {} data points".format(len(series_scaled)))
len(series_scaled),len(train_scaled),len(val_scaled)
the 'monthly sunspots' dataset has 3293 data points
(3293, 2470, 823)
input_chunk_length = 1200
output_chunk_length = 120
model_name = "TCN_sun_multi"
model = TCNModel(
input_chunk_length=input_chunk_length,
output_chunk_length=output_chunk_length,
# n_epochs=200,
n_epochs=200,
dropout=0.4,
dilation_base=2,
weight_norm=True,
kernel_size=3,
num_filters=6,
nr_epochs_val_period=1,
random_state=0,
save_checkpoints=True,
model_name=model_name,
force_reset=True,
**generate_torch_kwargs()
)
model.fit(
series = series_scaled,
verbose=True
)
Training: 0it [00:00, ?it/s]
TCNModel(output_chunk_shift=0, kernel_size=3, num_filters=6, num_layers=None, dilation_base=2, weight_norm=True, dropout=0.4, input_chunk_length=1200, output_chunk_length=120, n_epochs=200, nr_epochs_val_period=1, random_state=0, save_checkpoints=True, model_name=TCN_sun_multi, force_reset=True, pl_trainer_kwargs={'accelerator': 'cpu', 'callbacks': [<darts.utils.callbacks.TFMProgressBar object at 0x00000217E818CAD0>]})
# best_model = TCNModel.load_from_checkpoint(
# model_name=model_name, best=True
# )
best_model = model
backtest = best_model.historical_forecasts(
series=series_scaled, # input the entire data sequence
forecast_horizon=output_chunk_length, # forecast length
retrain=False, # retrain the model
verbose=True, # display detailed running information
)
plt.figure(figsize=(5, 3))
scaler.inverse_transform(series_scaled).plot(label="actual") # plot the acual value
scaler.inverse_transform(backtest).plot(label="predict") # plot the predict value
plt.legend()
print("MAE:", mae(scaler.inverse_transform(backtest), scaler.inverse_transform(series_scaled)))
print("RMSE:", rmse(scaler.inverse_transform(backtest), scaler.inverse_transform(series_scaled)))
MAE: 18.918198 RMSE: 25.549845
cycle15 = "19130801"
cycle16 = "19230801"
cycle17 = "19330901"
cycle18 = "19440201"
cycle19 = "19540401"
cycle20 = "19641001" # "19641001" ,
cycle21 = "19760301" # "19760301",
cycle22 = "19860901" # "19860901",
cycle23 = "19960801" #"19960801"
cycle24 = "20081201" #"20081201"
cycle25 = "20191201" #"20191201"
before_series_cycle25, series_cycle25 = series_scaled.split_after(pd.Timestamp("20191101"))
before_series_cycle24, series_cycle24 = before_series_cycle25.split_after(pd.Timestamp("20081101"))
before_series_cycle23, series_cycle23 = before_series_cycle24.split_after(pd.Timestamp("19960701"))
before_series_cycle22, series_cycle22 = before_series_cycle23.split_after(pd.Timestamp("19860801"))
before_series_cycle21, series_cycle21 = before_series_cycle22.split_after(pd.Timestamp("19760201"))
before_series_cycle20, series_cycle20 = before_series_cycle21.split_after(pd.Timestamp("19640901"))
before_series_cycle19, series_cycle19 = before_series_cycle20.split_after(pd.Timestamp("19540301"))
before_series_cycle18, series_cycle18 = before_series_cycle19.split_after(pd.Timestamp("19440101"))
before_series_cycle17, series_cycle17 = before_series_cycle18.split_after(pd.Timestamp("19330801"))
before_series_cycle16, series_cycle16 = before_series_cycle17.split_after(pd.Timestamp("19230701"))
before_series_cycle15, series_cycle15 = before_series_cycle16.split_after(pd.Timestamp("19130701"))
plt.figure(figsize=(5, 3))
scaler.inverse_transform(series_scaled).plot(label="actual") # plot the value for entire datas
scaler.inverse_transform(series_cycle25).plot(label="series_cycle25") # plot the value for sc25
scaler.inverse_transform(series_cycle24).plot(label="series_cycle24") # plot the value for sc24
scaler.inverse_transform(series_cycle23).plot(label="series_cycle23") # plot the value for sc23
scaler.inverse_transform(series_cycle22).plot(label="series_cycle22") # plot the value for sc22
scaler.inverse_transform(series_cycle21).plot(label="series_cycle21")
scaler.inverse_transform(series_cycle20).plot(label="series_cycle20")
scaler.inverse_transform(series_cycle19).plot(label="sc19")
scaler.inverse_transform(series_cycle18).plot(label="sc18")
scaler.inverse_transform(series_cycle17).plot(label="sc17")
scaler.inverse_transform(series_cycle16).plot(label="sc16")
scaler.inverse_transform(series_cycle15).plot(label="sc15")
plt.legend()
<matplotlib.legend.Legend at 0x21795dbf890>
def eval_model(model,before_series,series,title):
pred_series = model.predict(
n=len(series),
series = before_series
)
series = scaler.inverse_transform(series)
pred_series = scaler.inverse_transform(pred_series)
plt.figure(figsize=(5, 3))
print("MAE: ",mae(pred_series, series))
print("RMSE: ",rmse(pred_series, series))
print(len(pred_series),len(series))
series.plot(label="Actual")
pred_series.plot(label="Predict")
plt.title(title)
plt.legend()
plt.show()
print(series.time_index[0], pred_series.time_index[0])
eval_model(best_model, before_series_cycle15,series_cycle15,"Solar Cycle15")
MAE: 18.684488 RMSE: 26.501322 120 120
1913-08-01 00:00:00 1913-08-01 00:00:00
eval_model(best_model, before_series_cycle16,series_cycle16,"Solar Cycle16")
MAE: 18.600054 RMSE: 19.849741 121 121
1923-08-01 00:00:00 1923-08-01 00:00:00
eval_model(best_model, before_series_cycle17,series_cycle17,"Solar Cycle17")
MAE: 14.887585 RMSE: 18.96901 125 125
1933-09-01 00:00:00 1933-09-01 00:00:00
eval_model(best_model, before_series_cycle18,series_cycle18,"Solar Cycle18")
MAE: 14.8080435 RMSE: 16.615202 122 122
1944-02-01 00:00:00 1944-02-01 00:00:00
eval_model(best_model, before_series_cycle19,series_cycle19,"Solar Cycle19")
MAE: 28.659431 RMSE: 36.10116 126 126
1954-04-01 00:00:00 1954-04-01 00:00:00
eval_model(best_model, before_series_cycle20,series_cycle20,"Solar Cycle20")
MAE: 8.651718 RMSE: 11.246224 137 137
1964-10-01 00:00:00 1964-10-01 00:00:00
eval_model(best_model, before_series_cycle21,series_cycle21,"Solar Cycle21")
MAE: 29.228825 RMSE: 35.828457 126 126
1976-03-01 00:00:00 1976-03-01 00:00:00
eval_model(best_model, before_series_cycle22,series_cycle22,"Solar Cycle22")
MAE: 25.150364 RMSE: 28.17766 119 119
1986-09-01 00:00:00 1986-09-01 00:00:00
eval_model(best_model, before_series_cycle23,series_cycle23,"Solar Cycle23")
MAE: 11.6845455 RMSE: 17.102806 148 148
1996-08-01 00:00:00 1996-08-01 00:00:00
eval_model(best_model, before_series_cycle24,series_cycle24,"Solar Cycle24")
MAE: 28.078577 RMSE: 35.904285 132 132
2008-12-01 00:00:00 2008-12-01 00:00:00
eval_model(best_model, before_series_cycle25,series_cycle25,"Solar Cycle25")
MAE: 8.285022 RMSE: 10.659435 48 48
2019-12-01 00:00:00 2019-12-01 00:00:00
def backtest(testing_model,cycle, series_cycle,title):
backtest = testing_model.historical_forecasts(
series=series_scaled, # input the entire data sequence
start=pd.Timestamp(cycle), # start time
forecast_horizon=output_chunk_length, # forecast length
retrain=False, # retrain the model
verbose=True, # display detailed running information
)
backtest = backtest[0:len(series_cycle)]
series_cycle = scaler.inverse_transform(series_cycle)
backtest = scaler.inverse_transform(backtest)
print("MAE:", mae(backtest,series_cycle))
print("RMSE:", rmse(backtest,series_cycle))
plt.figure(figsize=(5, 3))
series_cycle.plot(label="Actual") # plot the actual value
backtest.plot(label="Fitting") # plot the predict value
plt.title(title)
plt.legend()
print(len(backtest),len(series_cycle))
print(backtest.time_index[0],series_cycle.time_index[0])
return backtest
pred_series_cycle15 = backtest(best_model,"19030901",series_cycle15,"Solar Cycle15")
MAE: 18.662493 RMSE: 26.491293 120 120 1913-08-01 00:00:00 1913-08-01 00:00:00
pred_series_cycle16 = backtest(best_model,"19130901",series_cycle16,"Solar Cycle16")
MAE: 18.621532 RMSE: 19.866823 121 121 1923-08-01 00:00:00 1923-08-01 00:00:00
pred_series_cycle17 = backtest(best_model,"19231001",series_cycle17,"Solar Cycle17")
MAE: 14.719825 RMSE: 18.904537 125 125 1933-09-01 00:00:00 1933-09-01 00:00:00
pred_series_cycle18 = backtest(best_model,"19340301",series_cycle18,"Solar Cycle18")
MAE: 14.82986 RMSE: 16.614609 122 122 1944-02-01 00:00:00 1944-02-01 00:00:00
pred_series_cycle19 = backtest(best_model,"19440501",series_cycle19,"Solar Cycle19")
MAE: 28.354893 RMSE: 36.033733 126 126 1954-04-01 00:00:00 1954-04-01 00:00:00
pred_series_cycle20 = backtest(best_model,"19541101",series_cycle20,"Solar Cycle20")
MAE: 8.622017 RMSE: 11.2970915 137 137 1964-10-01 00:00:00 1964-10-01 00:00:00
pred_series_cycle21 = backtest(best_model,"19660401",series_cycle21,"Solar Cycle21")
MAE: 28.949572 RMSE: 35.752636 126 126 1976-03-01 00:00:00 1976-03-01 00:00:00
pred_series_cycle22 = backtest(best_model,"19761001", series_cycle22,"Solar Cycle22")
MAE: 25.193789 RMSE: 28.231571 119 119 1986-09-01 00:00:00 1986-09-01 00:00:00
pred_series_cycle23 = backtest(best_model,"19860901",series_cycle23,"Solar Cycle23")
MAE: 12.342292 RMSE: 18.46485 148 148 1996-08-01 00:00:00 1996-08-01 00:00:00
pred_series_cycle24 = backtest(best_model,"19990101",series_cycle24,"Solar Cycle24")
MAE: 25.132095 RMSE: 34.449017 132 132 2008-12-01 00:00:00 2008-12-01 00:00:00
pred_series_cycle25 = backtest(best_model,"20100101", series_cycle25,"Solar Cycle25")
MAE: 8.385198 RMSE: 10.805431 48 48 2019-12-01 00:00:00 2019-12-01 00:00:00
def predict(model, n, series,val_scaled):
pred_series = model.predict(n=n, series= series)
pred_series = scaler.inverse_transform(pred_series)
val_scaled = scaler.inverse_transform(val_scaled)
plt.figure(figsize=(5, 3))
val_scaled.plot(label="Actual")
pred_series.plot(label="Forecast Solar Cycle25")
plt.legend()
plt.show()
return pred_series
pred_series_cycle = predict(best_model, 90, series_scaled,val_scaled)
pred_series_cycle.time_index[0]
Timestamp('2023-12-01 00:00:00')
darts_to_pd = TimeSeries.pd_dataframe(pred_series_cycle)
max_date = darts_to_pd['Sunspots'].idxmax() #date
print("The date when the peak solar activity occurs:",max_date)
max_value = darts_to_pd.loc[max_date]
print("The peak value :",max_value['Sunspots'])
min_date = darts_to_pd['Sunspots'].idxmin() #date
print("The date when solar cycle 25 ends:",min_date)
min_value = darts_to_pd.loc[min_date]
The date when the peak solar activity occurs: 2024-04-01 00:00:00 The peak value : 120.59972 The date when solar cycle 25 ends: 2030-04-01 00:00:00
plt.figure(figsize=(5, 3))
scaler.inverse_transform(series_cycle25).plot()
pred_series_cycle.plot()
<Axes: xlabel='Month'>