This file can be used to reproduce figure 9 from the paper (and equivalent figures). For details on the data used, please see the main reproducibility document

In [None]:
import numpy as np
import pickle
import pandas as pd
from functools import partial
import glob
import seaborn as sbs
import matplotlib.pyplot as plt

from scipy.stats import kendalltau, rankdata

font = {'size'   : 24}

plt.rc('font', **font)

#Font-requirement for GECCO
import matplotlib
matplotlib.rcParams['pdf.fonttype'] = 42
matplotlib.rcParams['ps.fonttype'] = 42

In [None]:
algorithms = ['DifferentialEvolution', 'ConfiguredPSO', 'modcma']

In [None]:
records = []
for fid in range(1,25):
    for a1 in algorithms:
        for a2 in algorithms:
            feat_imps = np.load(f"RF_Results_norm/short/Real/Feats_F{fid}_{a1}_{a2}.npy")
            records.append(np.append(feat_imps, [fid, a1, a2]))

In [None]:
dt_feat_imps = pd.DataFrame.from_records(records)

In [None]:
dt_feat_imps = dt_feat_imps.rename(columns={idx : val for idx,val in enumerate(np.append(feat_names, ['fid', 'A1', 'A2']))})

In [None]:
dt_molt = dt_feat_imps.melt(id_vars=['fid', 'A1', 'A2'])

In [None]:
dt_molt['value'] = pd.to_numeric(dt_molt['value'])

In [None]:
def create_feat_imp_plot(type_):
    records = []
    for fid in range(1,25):
        for a1 in algorithms:
            for a2 in algorithms:
                try:
                    feat_imps = np.load(f"RF_Results_norm/{type_}/Real/Feats_F{fid}_{a1}_{a2}.npy")
                    records.append(np.append(feat_imps, [fid, a1, a2]))
                except:
                    print((fid, a1, a2))
    dt_feat_imps = pd.DataFrame.from_records(records)
    
    dt_temp = pd.read_csv(f"RF_Results_norm/{type_}/Real/F9_modcma_modcma.csv", index_col=0)
    feat_names = np.array(dt_temp.columns[9:-5])
    
    dt_feat_imps = dt_feat_imps.rename(columns={idx : val for idx,val in enumerate(np.append(feat_names, ['fid', 'A1', 'A2']))})
    dt_molt = dt_feat_imps.melt(id_vars=['fid', 'A1', 'A2'])
    dt_molt['value'] = pd.to_numeric(dt_molt['value'])
    
    
    plt.figure(figsize=(25,10))
    sbs.stripplot(data=dt_molt, x='variable', y='value', hue='A2')
    plt.xticks(fontsize=10, rotation=90)
    plt.xlabel("Feature Name")
    plt.ylabel("Feature Importance")
    plt.ylim(0)
    plt.tight_layout()
    plt.savefig(f"Paper_Figures/Feat_imp_{type_}.pdf")

In [None]:
for type_ in ['short', 'medium', 'long']:
    create_feat_imp_plot(type_)

In [None]:
import shap

In [None]:
import pickle

In [None]:
def create_shap_plot(type_):
    records = []
    for fid in range(1,25):
        for a1 in algorithms:
            for a2 in algorithms:
                try:
                    with open(f"RF_Results_norm/{type_}/Real/Shaps_F{fid}_{a1}_{a2}.pkl", "rb") as f:
                        shap_vals = pickle.load(f)
#                     feat_imps = np.load(f"RF_Results_norm/{type_}/Real/Feats_F{fid}_{a1}_{a2}.npy")
                    
                    records.append(np.append(np.mean(np.abs(shap_vals.values), axis=0), [fid, a1, a2]))
                except:
                    print((fid, a1, a2))
    dt_feat_imps = pd.DataFrame.from_records(records)
    
#     dt_temp = pd.read_csv(f"RF_Results_norm/{type_}/Real/F9_modcma_modcma.csv", index_col=0)
    feat_names = shap_vals.feature_names
    
    dt_feat_imps = dt_feat_imps.rename(columns={idx : val for idx,val in enumerate(np.append(feat_names, ['fid', 'A1', 'A2']))})
    dt_molt = dt_feat_imps.melt(id_vars=['fid', 'A1', 'A2'])
    dt_molt['value'] = pd.to_numeric(dt_molt['value'])
    
    
    plt.figure(figsize=(25,10))
    sbs.stripplot(data=dt_molt, x='variable', y='value', hue='A2')
    plt.xticks(fontsize=10, rotation=90)
    plt.xlabel("Feature Name")
    plt.ylabel("Shapley Value")
    plt.ylim(0)
    plt.tight_layout()
    plt.savefig(f"Paper_Figures/Shap_vals_{type_}.pdf")

In [None]:
for type_ in ['short', 'medium', 'long']:
    create_shap_plot(type_)

Equivalent figure, but only for cases where the model performs well. Not included in the paper

In [None]:
dt_models = pd.read_csv("dt_model_acc_norm_v2.csv", index_col=0)

In [None]:
fid = 10
a1 = algorithms[0]
a2 = algorithms[1]
type_ = 'long'

In [None]:
dt_items = dt_models[(dt_models['type_'] == type_) & (dt_models['fid'] == fid) & (dt_models['a1'] == a1) & (dt_models['a2'] == a2)]

In [None]:
np.mean(dt_items['mse'])

In [None]:
def create_shap_plot(type_):
    records = []
    for fid in range(1,25):
        for a1 in algorithms:
            for a2 in algorithms:
                try:
                    dt_items = dt_models[(dt_models['type_'] == type_) & (dt_models['fid'] == fid) & (dt_models['a1'] == a1) & (dt_models['a2'] == a2)]
                    if np.mean(dt_items['mse']) < 0.5:
                        with open(f"RF_Results_norm/{type_}/Real/Shaps_F{fid}_{a1}_{a2}.pkl", "rb") as f:
                            shap_vals = pickle.load(f)
    #                     feat_imps = np.load(f"RF_Results_norm/{type_}/Real/Feats_F{fid}_{a1}_{a2}.npy")

                        records.append(np.append(np.mean(np.abs(shap_vals.values), axis=0), [fid, a1, a2]))
                    else:
                        print("too bad")
                except:
                    print((fid, a1, a2))
    dt_feat_imps = pd.DataFrame.from_records(records)
    
#     dt_temp = pd.read_csv(f"RF_Results_norm/{type_}/Real/F9_modcma_modcma.csv", index_col=0)
    feat_names = shap_vals.feature_names
    
    dt_feat_imps = dt_feat_imps.rename(columns={idx : val for idx,val in enumerate(np.append(feat_names, ['fid', 'A1', 'A2']))})
    dt_molt = dt_feat_imps.melt(id_vars=['fid', 'A1', 'A2'])
    dt_molt['value'] = pd.to_numeric(dt_molt['value'])
    
    
    plt.figure(figsize=(25,10))
    sbs.stripplot(data=dt_molt, x='variable', y='value', hue='A2')
    plt.xticks(fontsize=10, rotation=90)
    plt.xlabel("Feature Name")
    plt.ylabel("Shapley Value")
    plt.ylim(0)
    plt.tight_layout()
    plt.savefig(f"Paper_Figures/Shap_vals_{type_}_good_only.pdf")

In [None]:
with open(f"RF_Results_norm/long/Real/Shaps_F{fid}_{a1}_{a2}.pkl", "rb") as f:
    shap_vals = pickle.load(f)

In [None]:
len(shap_vals.feature_names)

In [None]:
for type_ in ['short', 'medium', 'long']:
    create_shap_plot(type_)