This file can be used to reproduce figures 7-8 from the paper (and equivalent figures). For details on the data used, please see the main reproducibility document. 

In [None]:
import numpy as np
import pickle
import pandas as pd
from functools import partial
import glob
import seaborn as sbs
import matplotlib.pyplot as plt

from scipy.stats import kendalltau, rankdata

font = {'size'   : 24}

plt.rc('font', **font)

#Font-requirement for GECCO
import matplotlib
matplotlib.rcParams['pdf.fonttype'] = 42
matplotlib.rcParams['ps.fonttype'] = 42

In [None]:
from sklearn.metrics import mean_squared_error, r2_score

In [None]:
algorithms = ['DifferentialEvolution', 'ConfiguredPSO', 'modcma']

In [None]:
records = []
for type_ in ['short', 'medium', 'long']:
    for fid in range(1,25):
        for a1 in algorithms:
            for a2 in algorithms:
                dt = pd.read_csv(f"RF_Results_norm/{type_}/Real/F{fid}_{a1}_{a2}.csv", index_col=0)
                for sp in np.unique(dt['sp']):
                    dt_one_sp = dt[dt['sp'] == sp]
                    records.append([type_, fid, a1, a2, sp, mean_squared_error(dt_one_sp['Real'], dt_one_sp['Predicted']), r2_score(dt_one_sp['Real'], dt_one_sp['Predicted'])])

In [None]:
dt_models = pd.DataFrame.from_records(records, columns=['type_', 'fid', 'a1', 'a2', 'sp', 'mse', 'r2'])

In [None]:
dt_models.to_csv("dt_model_acc_norm_v2.csv")

In [None]:
import seaborn as sbs

In [None]:
for fid in range(1,25):
    for a1 in algorithms:
        for a2 in algorithms:
            dt_sub = dt_models[(dt_models['fid'] == fid) & (dt_models['a1'] == a1) & (dt_models['a2'] == a2)]
            plt.figure(figsize=(16,9))
            sbs.scatterplot(data = dt_sub, x = 'sp', y='mse', hue='type_')
            
            plt.xlabel("Switchpoint")
            plt.ylabel("Mean Square Error")
            plt.tight_layout()

            plt.savefig(f"MSE_Figures_norm/F{fid}_{a1}_{a2}.pdf")
# plt.scatter(agg_vals.index, agg_vals['Real'], label='Real')
# plt.legend()
# plt.axhline(0, color='k', ls=':')
# plt.axhline(1, color='k', ls=':')
# plt.axhline(-1, color='k', ls=':')
# plt.ylim(-1,1)

In [None]:
dt_sub = dt_models[(dt_models['a1'] == a1) & (dt_models['a2'] == a2)]
plt.figure(figsize=(16,9))
sbs.violinplot(data = dt_sub, x = 'fid', y='mse', hue='type_', cut=0, inner='quartile')
plt.ylim(0,1)

In [None]:
dt_sub = dt_models[(dt_models['fid'] == fid)]
plt.figure(figsize=(16,9))
sbs.scatterplot(data = dt_sub, x = 'sp', y='mse', hue='type_')

plt.xlabel("Switchpoint")
plt.ylabel("Mean Square Error")
plt.tight_layout()

plt.savefig(f"MSE_Figures/F{fid}_{a1}_{a2}.pdf")

In [None]:
from sklearn.metrics import accuracy_score, f1_score

In [None]:
records = []
for type_ in ['short', 'medium', 'long']:
    for fid in range(1,25):
        for a1 in algorithms:
            for a2 in algorithms:
                dt = pd.read_csv(f"RF_Results_norm/{type_}/Bool/F{fid}_{a1}_{a2}.csv", index_col=0)
                try:
                    for sp in np.unique(dt['sp']):
                        dt_one_sp = dt[dt['sp'] == sp]
                        records.append([type_, fid, a1, a2, sp, accuracy_score(dt_one_sp['Real'], dt_one_sp['Predicted']), f1_score(dt_one_sp['Real'], dt_one_sp['Predicted'])])
                except:
                    print(a1, a2, fid)

In [None]:
dt

In [None]:
dt_models_bool = pd.DataFrame.from_records(records, columns=['type_', 'fid', 'a1', 'a2', 'sp', 'acc', 'f1'])

In [None]:
dt_models_bool.to_csv("dt_model_bool_acc_norm.csv")

In [None]:
dt_sub = dt_models_bool[(dt_models_bool['fid'] == 1)]
plt.figure(figsize=(16,9))
sbs.scatterplot(data = dt_sub, x = 'sp', y='f1', hue='type_')

plt.xlabel("Switchpoint")
plt.ylabel("F1-score")
plt.tight_layout()



In [None]:
for fid in range(1,25):
    for a1 in algorithms:
        for a2 in algorithms:
            dt_sub = dt_models_bool[(dt_models_bool['fid'] == fid) & (dt_models_bool['a1'] == a1) & (dt_models_bool['a2'] == a2)]
            plt.figure(figsize=(16,9))
            sbs.scatterplot(data = dt_sub, x = 'sp', y='f1', hue='type_')
            
            plt.xlabel("Switchpoint")
            plt.ylabel("F1-score")
            plt.tight_layout()

            plt.savefig(f"F1_Figures_norm/F{fid}_{a1}_{a2}.pdf")
            plt.close()

In [None]:
dt_sub = dt_models_bool[(dt_models_bool['a1'] == a1) & (dt_models_bool['a2'] == a2)]
plt.figure(figsize=(16,9))
sbs.violinplot(data = dt_sub, x = 'fid', y='f1', hue='type_', cut=0, inner='quartile')
plt.ylim(0,1)

In [None]:
dt_models = pd.read_csv("dt_model_acc_norm_v2.csv", index_col=0)

In [None]:
dt_models

In [None]:
dt_plot = dt_models[dt_models['type_'] == 'medium']

In [None]:
plt.figure(figsize=(24,10))
sbs.boxplot(data=dt_plot, x='fid', y = 'mse', hue='a1')
plt.xlabel('Function ID')
plt.ylabel('MSE')
temp = plt.legend()
temp.get_texts()[0].set_text('DE')
temp.get_texts()[1].set_text('PSO')
temp.get_texts()[2].set_text('CMA-ES')
temp.set_title('First Algorithm')
plt.tight_layout()
plt.savefig("Paper_Figures/MSE_Overall.pdf")

In [None]:
dt_plot = dt_models[dt_models['type_'] == 'medium']

In [None]:
plt.figure(figsize=(24,10))
sbs.boxplot(data=dt_plot, x='fid', y = 'mse', hue='a1')
plt.xlabel('Function ID')
plt.ylabel('MSE')
temp = plt.legend()
temp.get_texts()[0].set_text('DE')
temp.get_texts()[1].set_text('PSO')
temp.get_texts()[2].set_text('CMA-ES')
temp.set_title('First Algorithm')
plt.tight_layout()
plt.savefig("Paper_Figures/MSE_Overall_norm_medium.pdf")

In [None]:
dt_all = pd.DataFrame()
for fid in range(1,25):
    for a1 in algorithms:
        for a2 in algorithms:
            dt_temp = pd.read_csv(f"RF_Results_long/Real/F{fid}_{a1}_{a2}.csv", index_col=0)
            dt_temp['a2'] = a2
            dt_all = dt_all.append(dt_temp)

In [None]:
dt_temp_small = dt_all[(dt_all['a1'] == 'modcma') & (dt_all['fid'] == 2)]


In [None]:
dt_molten = dt_all.melt(value_vars=['Real', 'Predicted'], id_vars = ['fid', 'sp', 'a1', 'a2'])

In [None]:
plt.figure(figsize=(24,10))
sbs.boxenplot(data=dt_molten, x='fid', y = 'value', hue='variable')
plt.xlabel('Function ID')
plt.ylabel('Benefit')
temp = plt.legend()
# temp.get_texts()[0].set_text('DE')
# temp.get_texts()[1].set_text('PSO')
# temp.get_texts()[2].set_text('CMA-ES')
# temp.set_title('First Algorithm')
plt.tight_layout()
# plt.savefig("Paper_Figures/MSE_Overall.pdf")

In [None]:
for fid in range(1,25):
    for a1 in algorithms:
        for a2 in algorithms:
            dt_sub = dt_models[(dt_models['fid'] == fid) & (dt_models['a1'] == a1) & (dt_models['a2'] == a2)]
            plt.figure(figsize=(16,9))
            sbs.scatterplot(data = dt_sub, x = 'sp', y='mse', hue='type_')
            
            plt.xlabel("Switchpoint")
            plt.ylabel("Mean Square Error")
            plt.tight_layout()

            plt.savefig(f"MSE_Figures/F{fid}_{a1}_{a2}.pdf")

In [None]:
fid = 8
dt_sub = dt_models[(dt_models['fid'] == fid) & (dt_models['type_'] == 'medium')]
plt.figure(figsize=(16,9))
sbs.scatterplot(data = dt_sub, x = 'sp', y='mse', hue='a1')

plt.xlabel("Switchpoint")
plt.ylabel("Mean Square Error")
plt.tight_layout()

# plt.savefig(f"MSE_Figures/F{fid}_{a1}_{a2}.pdf")

In [None]:
records = []
for type_ in ['long']:
    for fid in range(1,25):
        for a1 in algorithms:
            for a2 in algorithms:
                dt = pd.read_csv(f"RF_Results_umap/{type_}/Real/F{fid}_{a1}_{a2}.csv", index_col=0)
                for sp in np.unique(dt['sp']):
                    dt_one_sp = dt[dt['sp'] == sp]
                    records.append([type_, fid, a1, a2, sp, mean_squared_error(dt_one_sp['Real'], dt_one_sp['Predicted']), r2_score(dt_one_sp['Real'], dt_one_sp['Predicted'])])

In [None]:
dt_models_umap = pd.DataFrame.from_records(records, columns=['type_', 'fid', 'a1', 'a2', 'sp', 'mse', 'r2'])

In [None]:
plt.figure(figsize=(24,10))
sbs.boxplot(data=dt_models_umap, x='fid', y = 'mse', hue='a1')
plt.xlabel('Function ID')
plt.ylabel('MSE')
temp = plt.legend()
temp.get_texts()[0].set_text('DE')
temp.get_texts()[1].set_text('PSO')
temp.get_texts()[2].set_text('CMA-ES')
temp.set_title('First Algorithm')
plt.ylim(0,1.75)
plt.tight_layout()
plt.savefig("Paper_Figures/MSE_Overall_UMAP.pdf")

In [None]:
records = []
for type_ in ['short', 'medium', 'long']:
    for fid in range(1,25):
        for a1 in algorithms:
            for a2 in algorithms:
                dt = pd.read_csv(f"RF_Results_norm/{type_}/Real/F{fid}_{a1}_{a2}.csv", index_col=0)
                for sp in np.unique(dt['sp']):
                    dt_one_sp = dt[dt['sp'] == sp]
                    records.append([type_, fid, a1, a2, sp, mean_squared_error(dt_one_sp['Real'], dt_one_sp['Predicted']), r2_score(dt_one_sp['Real'], dt_one_sp['Predicted'])])

In [None]:
from sklearn.metrics import mean_absolute_error

In [None]:
dt = pd.read_csv(f"RF_Results_norm/{type_}/Real/F{fid}_{a1}_{a2}.csv", index_col=0)
mses = []
for sp in np.unique(dt['sp']):
    dt_one_sp = dt[dt['sp'] == sp]
    mses.append(mean_squared_error(dt_one_sp['Real'], dt_one_sp['Predicted']))
    # records.append([type_, fid, a1, a2, sp, mean_squared_error(dt_one_sp['Real'], dt_one_sp['Predicted']), r2_score(dt_one_sp['Real'], dt_one_sp['Predicted'])])

In [None]:
def create_plot_traj_mse(fid, a1, a2, type_ = 'long'):
    dt = pd.read_csv(f"RF_Results_norm/{type_}/Real/F{fid}_{a1}_{a2}.csv", index_col=0)
    mses = []
    for sp in np.unique(dt['sp']):
        dt_one_sp = dt[dt['sp'] == sp]
        mses.append(mean_squared_error(dt_one_sp['Real'], dt_one_sp['Predicted']))
    fig, ax = plt.subplots(constrained_layout=True, figsize=(16,5))
    ax.scatter(dt['sp'], dt['Real'], label='Real')
    ax.scatter(dt['sp'], dt['Predicted'], label='Pred')

    ax2 = ax.twinx()

    ax2.plot(np.unique(dt['sp']), mses, c='k', lw=4)

    ax2.set_ylim(0,2)
    ax2.set_yticks([0,0.5,1,1.5,2])

    ax.legend()
    ax.axhline(0, color='k', ls=':')
    ax.axhline(1, color='k', ls=':')
    ax.axhline(-1, color='k', ls=':')
    ax.set_ylabel("Relative Improvement")
    ax2.set_ylabel("Mean Squared Error")
    ax.set_xlabel("Switchpoint")
    ax.set_xlim(250, 9500)
    ax.set_xticks([250, 2500, 5000, 7500, 9500])
    ax.set_ylim(-1,1)

    plt.ticklabel_format()
    plt.savefig(f"Paper_Figures/Trajectory_with_MSE/F{fid}_{a1}_{a2}_{type_}.pdf")
    plt.close()

In [None]:
create_plot_traj_mse(7, 'DifferentialEvolution', 'ConfiguredPSO')

In [None]:
create_plot_traj_mse(15, 'ConfiguredPSO', 'modcma')

In [None]:
algorithms = ['DifferentialEvolution', 'ConfiguredPSO', 'modcma']

In [None]:
for type_ in ['short', 'medium', 'long']:
    for fid in range(1,25):
        for a1 in algorithms:
            for a2 in algorithms:
                create_plot_traj_mse(fid, a1, a2, type_)