""" This will read the COLVAR_MUTLI files for an umbrella sampling run, and plot out normalised histograms and an overlap matrix. """

import numpy as np
import matplotlib.pyplot as plt
from glob import glob
import pandas as pd

hills = [f"../raw_data/COLVAR_MULTI.{i}" for i in range(24)]

fig = plt.figure()
ax = plt.subplot()

data_series = []

colors = plt.cm.turbo(np.linspace(0, 1, 32))

for n, hill in enumerate(hills):
    data = pd.read_table(hill, header=None, delim_whitespace=True, comment="#")
    data_series.append(data[1])
    values, x = np.histogram(data_series[-1], bins=25, density=True)
    bin_centers = 0.5 * (x[1:] + x[:-1])
    ax.plot(bin_centers, values, color=colors[n])

ax.set_xlabel("DFG-loop DRMSD / nm")
ax.set_ylabel("Normalized Histogram")
plt.savefig("histogram.pdf")
plt.show()


def overlap(series_1, series_2):
    """This produces a numerical overlap integral between two series."""
    min_value = np.min([series_1, series_2])
    max_value = np.max([series_1, series_2])
    bins = np.linspace(min_value, max_value, 100)
    hist1, _ = np.histogram(series_1, bins=bins, density=True)
    hist2, _ = np.histogram(series_2, bins=bins, density=True)
    return np.sum(np.minimum(hist1, hist2)) * 2 / (np.sum(hist1) + np.sum(hist2))


overlap_matrix = np.zeros((len(data_series), len(data_series)))

for i in range(len(data_series)):
    for j in range(i, len(data_series)):
        overlap_value = overlap(data_series[i], data_series[j])
        overlap_matrix[i, j] = overlap_value
        overlap_matrix[j, i] = overlap_value

np.savetxt("overlap.dat", overlap_matrix, fmt="%1.3f")
plt.imshow(overlap_matrix, interpolation=None)
plt.colorbar()
plt.xlabel("Umbrella Window")
plt.ylabel("Umbrella Window")
plt.savefig("overlap.pdf")
plt.show()
