import torch
import torch.nn as nn
import torch.optim as optim
import numpy as np
import pandas as pd
import shutil
from sklearn.preprocessing import MinMaxScaler
from tqdm import tqdm_notebook as tqdm
from tensorboardX import SummaryWriter
import matplotlib.pyplot as plt
from darts import TimeSeries
from darts.dataprocessing.transformers import Scaler
from darts.models import TransformerModel, ExponentialSmoothing
from darts.metrics import mape
from darts.utils.statistics import check_seasonality, plot_acf
from darts.datasets import AirPassengersDataset, SunspotsDataset
from darts.models import TCNModel, RNNModel
from darts.utils.callbacks import TFMProgressBar
import warnings
warnings.filterwarnings("ignore")
import logging
logging.disable(logging.CRITICAL)
def generate_torch_kwargs():
# run torch models on CPU, and disable progress bars for all model stages except training.
return {
"pl_trainer_kwargs": {
"accelerator": "cpu",
"callbacks": [TFMProgressBar(enable_train_bar_only=True)],
}
}
from darts.metrics import mape,rmse,mae
series = SunspotsDataset().load().astype(np.float32)
train, val = series.split_after(0.75)
train.plot(label = "Training Data")
val.plot(label='Validation Data')
plt.show()
scaler = Scaler()
train_scaled = scaler.fit_transform(train) #training data
val_scaled = scaler.transform(val) #test data
series_scaled = scaler.transform(series) #all data
print("the 'monthly sunspots' dataset has {} data points".format(len(series_scaled)))
the 'monthly sunspots' dataset has 3293 data points
input_chunk_length = 20
model_name = "TCN_sun_one"
model = TCNModel(
input_chunk_length=input_chunk_length,
output_chunk_length=1,
n_epochs=30,
dropout=0,
dilation_base=2,
weight_norm=True,
kernel_size=3,
num_filters=6,
nr_epochs_val_period=1,
random_state=0,
save_checkpoints=True,
model_name=model_name,
force_reset=True,
**generate_torch_kwargs()
)
model.fit(
series=train_scaled,
val_series=val_scaled,
verbose=True
)
Training: 0it [00:00, ?it/s]
TCNModel(output_chunk_shift=0, kernel_size=3, num_filters=6, num_layers=None, dilation_base=2, weight_norm=True, dropout=0, input_chunk_length=20, output_chunk_length=1, n_epochs=30, nr_epochs_val_period=1, random_state=0, save_checkpoints=True, model_name=TCN_sun_one, force_reset=True, pl_trainer_kwargs={'accelerator': 'cpu', 'callbacks': [<darts.utils.callbacks.TFMProgressBar object at 0x000002D3FD01DED0>]})
best_model = TCNModel.load_from_checkpoint(
model_name=model_name, best=True
)
backtest = best_model.historical_forecasts(
series=series_scaled, # input the entire data sequence
forecast_horizon=120, # forcast length
retrain=False, # retrain the model
verbose=True, # display the running information
)
plt.figure(figsize=(5, 3))
scaler.inverse_transform(series_scaled).plot(label="actual") # plot the actual value
scaler.inverse_transform(backtest).plot(label="mape:{:.2f}".format(mape(backtest, series_scaled))) # plot the predict value
plt.legend()
print("RMSE:", rmse(scaler.inverse_transform(series_scaled),scaler.inverse_transform(backtest)))
print("MAE:", mae(scaler.inverse_transform(series_scaled),scaler.inverse_transform(backtest)))
RMSE: 58.841866 MAE: 42.917446
cycle15 = "19130801"
cycle16 = "19230801"
cycle17 = "19330901"
cycle18 = "19440201"
cycle19 = "19540401"
cycle20 = "19641001" # "19641001" ,
cycle21 = "19760301" # "19760301",
cycle22 = "19860901" # "19860901",
cycle23 = "19960801" #"19960801"
cycle24 = "20081201" #"20081201"
cycle25 = "20191201" #"20191201"
before_series_cycle25, series_cycle25 = series_scaled.split_after(pd.Timestamp("20191101"))
before_series_cycle24, series_cycle24 = before_series_cycle25.split_after(pd.Timestamp("20081101"))
before_series_cycle23, series_cycle23 = before_series_cycle24.split_after(pd.Timestamp("19960701"))
before_series_cycle22, series_cycle22 = before_series_cycle23.split_after(pd.Timestamp("19860801"))
before_series_cycle21, series_cycle21 = before_series_cycle22.split_after(pd.Timestamp("19760201"))
before_series_cycle20, series_cycle20 = before_series_cycle21.split_after(pd.Timestamp("19640901"))
before_series_cycle19, series_cycle19 = before_series_cycle20.split_after(pd.Timestamp("19540301"))
before_series_cycle18, series_cycle18 = before_series_cycle19.split_after(pd.Timestamp("19440101"))
before_series_cycle17, series_cycle17 = before_series_cycle18.split_after(pd.Timestamp("19330801"))
before_series_cycle16, series_cycle16 = before_series_cycle17.split_after(pd.Timestamp("19230701"))
before_series_cycle15, series_cycle15 = before_series_cycle16.split_after(pd.Timestamp("19130701"))
plt.figure(figsize=(5, 3))
scaler.inverse_transform(series_scaled).plot(label="actual") # plot the value for entire datas
scaler.inverse_transform(series_cycle25).plot(label="sc25") # plot the value for sc25
scaler.inverse_transform(series_cycle24).plot(label="sc24") # plot the value for sc24
scaler.inverse_transform(series_cycle23).plot(label="sc23") # plot the value for sc23
scaler.inverse_transform(series_cycle22).plot(label="sc22") # plot the value for sc22
scaler.inverse_transform(series_cycle21).plot(label="sc21") # plot the value for sc21
scaler.inverse_transform(series_cycle20).plot(label="sc20") # plot the value for sc20
scaler.inverse_transform(series_cycle19).plot(label="sc19") # plot the value for sc19
scaler.inverse_transform(series_cycle18).plot(label="sc18") # plot the value for sc18
scaler.inverse_transform(series_cycle17).plot(label="sc17") # plot the value for sc17
scaler.inverse_transform(series_cycle16).plot(label="sc16") # plot the value for sc16
scaler.inverse_transform(series_cycle15).plot(label="sc15") # plot the value for sc15
plt.legend()
<matplotlib.legend.Legend at 0x2d3a71fad10>
def eval_model(model,before_series,series,title):
pred_series = model.predict(
n=len(series),
series = before_series
)
series = scaler.inverse_transform(series)
pred_series = scaler.inverse_transform(pred_series)
print("MAE:",mae(pred_series, series))
print("RMSE:",rmse(series,pred_series))
print("prediction length:",len(series))
print(series.time_index[0], pred_series.time_index[0])
plt.figure(figsize=(5, 3))
series.plot(label="Actual")
pred_series.plot(label="Predict")
plt.title(title)
plt.legend()
plt.show()
eval_model(best_model, before_series_cycle15,series_cycle15,"Solar Cycle15")
MAE: 27.084196 RMSE: 35.17347 prediction length: 120 1913-08-01 00:00:00 1913-08-01 00:00:00
eval_model(best_model, before_series_cycle16,series_cycle16,"Solar Cycle16")
MAE: 43.896187 RMSE: 51.111748 prediction length: 121 1923-08-01 00:00:00 1923-08-01 00:00:00
eval_model(best_model, before_series_cycle17,series_cycle17,"Solar Cycle17")
MAE: 31.9774 RMSE: 38.426548 prediction length: 125 1933-09-01 00:00:00 1933-09-01 00:00:00
eval_model(best_model, before_series_cycle18,series_cycle18,"Solar Cycle18")
MAE: 67.76187 RMSE: 78.525894 prediction length: 122 1944-02-01 00:00:00 1944-02-01 00:00:00
eval_model(best_model, before_series_cycle19,series_cycle19,"Solar Cycle19")
MAE: 68.43466 RMSE: 90.30483 prediction length: 126 1954-04-01 00:00:00 1954-04-01 00:00:00
eval_model(best_model, before_series_cycle20,series_cycle20,"Solar Cycle20")
MAE: 41.625347 RMSE: 49.949512 prediction length: 137 1964-10-01 00:00:00 1964-10-01 00:00:00
eval_model(best_model, before_series_cycle21,series_cycle21,"Solar Cycle21")
MAE: 62.162727 RMSE: 72.85285 prediction length: 126 1976-03-01 00:00:00 1976-03-01 00:00:00
eval_model(best_model, before_series_cycle22,series_cycle22,"Solar Cycle22")
MAE: 58.236973 RMSE: 65.86109 prediction length: 119 1986-09-01 00:00:00 1986-09-01 00:00:00
eval_model(best_model, before_series_cycle23,series_cycle23,"Solar Cycle23")
MAE: 27.995817 RMSE: 34.597046 prediction length: 148 1996-08-01 00:00:00 1996-08-01 00:00:00
eval_model(best_model, before_series_cycle24,series_cycle24,"Solar Cycle24")
MAE: 39.53812 RMSE: 49.80267 prediction length: 132 2008-12-01 00:00:00 2008-12-01 00:00:00
eval_model(best_model, before_series_cycle25,series_cycle25,"Solar Cycle25")
MAE: 12.2728195 RMSE: 15.573511 prediction length: 48 2019-12-01 00:00:00 2019-12-01 00:00:00
def backtest(testing_model,cycle, series_cycle,title):
backtest = testing_model.historical_forecasts(
series=series_scaled, # input the entire data sequence
start=pd.Timestamp(cycle), # start time
forecast_horizon=1, # forecast length
retrain=False, # retrain the model
verbose=True, # display detailed running information
)
backtest = backtest[0:len(series_cycle)]
series_cycle = scaler.inverse_transform(series_cycle)
backtest = scaler.inverse_transform(backtest)
print(len(backtest),len(series_cycle))
plt.figure(figsize=(5, 3))
series_cycle.plot(label="Actual")
backtest.plot(label="Fitting")
plt.title(title)
plt.legend()
print("MAE:", mae(backtest,series_cycle))
print("RMSE:", rmse(backtest,series_cycle))
print(series_cycle.time_index[0],backtest.time_index[0])
return backtest
pred_series_cycle15 = backtest(best_model,cycle15,series_cycle15,"Solar Cycle15")
120 120 MAE: 1.94609 RMSE: 2.632276 1913-08-01 00:00:00 1913-08-01 00:00:00
pred_series_cycle16 = backtest(best_model,cycle16,series_cycle16,"Solar Cycle16")
121 121 MAE: 1.4749224 RMSE: 1.9877961 1923-08-01 00:00:00 1923-08-01 00:00:00
pred_series_cycle17 = backtest(best_model,cycle17,series_cycle17,"Solar Cycle17")
125 125 MAE: 1.8153664 RMSE: 2.3345559 1933-09-01 00:00:00 1933-09-01 00:00:00
pred_series_cycle18 = backtest(best_model,cycle18,series_cycle18,"Solar Cycle18")
122 122 MAE: 2.0513554 RMSE: 2.6425087 1944-02-01 00:00:00 1944-02-01 00:00:00
pred_series_cycle19 = backtest(best_model,cycle19,series_cycle19,"Solar Cycle19")
126 126 MAE: 3.2370994 RMSE: 4.801907 1954-04-01 00:00:00 1954-04-01 00:00:00
pred_series_cycle20 = backtest(best_model,cycle20,series_cycle20,"Solar Cycle20")
137 137 MAE: 1.4238318 RMSE: 1.7601632 1964-10-01 00:00:00 1964-10-01 00:00:00
pred_series_cycle21 = backtest(best_model,cycle21,series_cycle21,"Solar Cycle21")
126 126 MAE: 1.8858839 RMSE: 2.487002 1976-03-01 00:00:00 1976-03-01 00:00:00
pred_series_cycle22 = backtest(best_model,cycle22, series_cycle22,"Solar Cycle22")
119 119 MAE: 1.7093934 RMSE: 2.2763624 1986-09-01 00:00:00 1986-09-01 00:00:00
pred_series_cycle23 = backtest(best_model,cycle23,series_cycle23,"Solar Cycle23")
148 148 MAE: 1.4826883 RMSE: 1.9834237 1996-08-01 00:00:00 1996-08-01 00:00:00
pred_series_cycle24 = backtest(best_model,cycle24,series_cycle24,"Solar Cycle24")
132 132 MAE: 1.0973927 RMSE: 1.6072083 2008-12-01 00:00:00 2008-12-01 00:00:00
pred_series_cycle25 = backtest(best_model,cycle25, series_cycle25,"Solar Cycle25")
48 48 MAE: 1.0416851 RMSE: 1.2918218 2019-12-01 00:00:00 2019-12-01 00:00:00
def predict(model, n, series):
# Use the model to make predictions for n time steps
pred_series = model.predict(n=n, series= series) #Start from the last training set
pred_series = scaler.inverse_transform(pred_series)
series = scaler.inverse_transform(series)
plt.figure(figsize=(5, 3))
series.plot(label="Actual")
pred_series.plot(label="Forecast Solar Cycle25")
plt.legend()
plt.show()
return pred_series
pred_series_cycle = predict(best_model, 90, val_scaled)
print(pred_series_cycle.time_index[0])
2023-12-01 00:00:00
darts_to_pd = TimeSeries.pd_dataframe(pred_series_cycle)
max_date = darts_to_pd['Sunspots'].idxmax() #date
print("The date when the peak solar activity occurs:",max_date)
max_value = darts_to_pd.loc[max_date]
print("The peak value :",max_value['Sunspots'])
min_date = darts_to_pd['Sunspots'].idxmin() #date
print("The date when solar cycle 25 ends:",min_date)
min_value = darts_to_pd.loc[min_date]
right_series_cycle25, series_cycle26 = pred_series_cycle.split_after(pd.Timestamp("20301101"))
The date when the peak solar activity occurs: 2024-10-01 00:00:00 The peak value : 135.33604 The date when solar cycle 25 ends: 2030-11-01 00:00:00
plt.figure(figsize=(5, 3))
scaler.inverse_transform(series_cycle25).plot(label="Actual")
right_series_cycle25.plot(label="Predict Solar Cycle25")
plt.legend(loc='center')
plt.show()