from netCDF4 import Dataset
import h5py
import numpy as np
import time

def parseFile(file_lines):
    beta = None
    plaquettes = []
    polyakovs = []
    currentPlaq = None
    for line in file_lines:
        if line.startswith('[INIT][0]beta'):
            beta = float(line.split("=")[1])
        if line.startswith('[MAIN][0]Plaquette'):
            plaquettes.append(float(line.split(" ")[1]))
        if line.startswith('[FUND_POLYAKOV][0]Polyakov direction 0'):
            polyakovs.append(float(line.split(" ")[4]))
    return beta, np.array(plaquettes), np.array(polyakovs)

Ls = np.array([12., 16., 20., 24.])
Nts = [4,5,6]
bs = [  np.array([2.2,2.21,2.22,2.23,2.24,2.25,2.26,2.27,2.275,2.28,
                2.285,2.29,2.295,2.298,2.299,2.3,2.301,2.302,2.305,
                2.31,2.315,2.32,2.325,2.33,2.34,2.35,2.36,2.37,2.38,
                2.39,2.4]),
        np.array([2.29,2.3,2.31,2.32,2.33,2.34,2.345,2.35,2.355,2.36,
                2.365,2.369,2.37,2.371,2.372,2.375,2.38,2.385,2.39,
                2.395,2.4,2.41,2.42,2.43,2.44,2.45]),
        np.array([2.33,2.34,2.35,2.36,2.37,2.38,2.39,2.4,2.405,2.41,
                2.415,2.42,2.425,2.426,2.427,2.428,2.43,2.435,2.44,
                2.445,2.45,2.455,2.46,2.47,2.48,2.49,2.5,2.51,2.52,2.53
                ])
     ]

for nt in range(len(Nts)):
    # the output files for the majority of the data (199 configurations per beta)
    output_file_paths = [["raw_data/Nt="+str(Nts[nt])+"_output_files/"+str(Nts[nt])+"x12x12x12/output_file_{:.3f}".format(b) for b in bs[nt]],
                     ["raw_data/Nt="+str(Nts[nt])+"_output_files/"+str(Nts[nt])+"x16x16x16/output_file_{:.3f}".format(b) for b in bs[nt]],
                     ["raw_data/Nt="+str(Nts[nt])+"_output_files/"+str(Nts[nt])+"x20x20x20/output_file_{:.3f}".format(b) for b in bs[nt]],
                     ["raw_data/Nt="+str(Nts[nt])+"_output_files/"+str(Nts[nt])+"x24x24x24/output_file_{:.3f}".format(b) for b in bs[nt]]]
    output_files = [[open(p).read().split("\n") for p in ps] for ps in output_file_paths]
    
    # the output files for a single configuration per beta to round the total up to 200
    output_file_paths_singles = [["raw_data/Nt="+str(Nts[nt])+"_output_files/singles/"+str(Nts[nt])+"_12_output_file_{:.3f}".format(b) for b in bs[nt]],
                             ["raw_data/Nt="+str(Nts[nt])+"_output_files/singles/"+str(Nts[nt])+"_16_output_file_{:.3f}".format(b) for b in bs[nt]],
                             ["raw_data/Nt="+str(Nts[nt])+"_output_files/singles/"+str(Nts[nt])+"_20_output_file_{:.3f}".format(b) for b in bs[nt]],
                             ["raw_data/Nt="+str(Nts[nt])+"_output_files/singles/"+str(Nts[nt])+"_24_output_file_{:.3f}".format(b) for b in bs[nt]]]
    output_files_singles = [[open(p).read().split("\n") for p in ps] for ps in output_file_paths_singles]
    
    # parse the files
    betas = []
    avg_plaquettes = []
    ploops = []
    for fs in output_files:
        betas_L = []
        avg_plaquettes_L = []
        ploops_L = []
        for f in fs:
            beta, plaqs, polys = parseFile(f)
            betas_L.append(beta)
            avg_plaquettes_L.append(plaqs)
            ploops_L.append(polys)
        betas.append(betas_L)
        avg_plaquettes.append(avg_plaquettes_L)
        ploops.append(ploops_L)
    # free up the memory
    output_files = []

    betas_singles = []
    avg_plaquettes_singles = []
    ploops_singles = []
    for fs in output_files_singles:
        betas_L = []
        avg_plaquettes_L = []
        ploops_L = []
        for f in fs:
            beta, plaqs, polys = parseFile(f)
            betas_L.append(beta)
            avg_plaquettes_L.append(plaqs)
            ploops_L.append(polys)
        betas_singles.append(betas_L)
        avg_plaquettes_singles.append(avg_plaquettes_L)
        ploops_singles.append(ploops_L)
    # free up the memory
    output_files_singles = []

    # compute actions from the average plaquette values
    actions_most = np.array([6*Nts[nt]*(Ls[l]**3)*(-np.array(avg_plaquettes[l])[:,99:19999:100]) for l in range(len(Ls))])
    actions_singles = np.array([6*Nts[nt]*(Ls[l]**3)*(-np.array(avg_plaquettes_singles[l])[:,0]) for l in range(len(Ls))])[:,:,np.newaxis]
    acts = np.concatenate([actions_most, actions_singles], axis=2)

    # combine polyakov loops
    polyakovs_most = np.array([np.array(ploops[l])[:,99:19999:100] for l in range(len(Ls))])
    polyakovs_singles = np.array([np.array(ploops_singles[l])[:,0] for l in range(len(Ls))])[:,:,np.newaxis]
    polys = np.concatenate([polyakovs_most, polyakovs_singles], axis=2)

    for l in range(len(Ls)):
        # load the persistence images for the 199 configurations per beta
        with h5py.File('raw_data/Nt='+str(Nts[nt])+'_pis/pis_Nt='+str(Nts[nt])+'_Ns='+str(int(Ls[l]))+'.h5', 'r') as hf:
            pis_most = hf['persistence_images'][:]
        # load the persistence images for the 1 configuration per beta
        with h5py.File('raw_data/Nt='+str(Nts[nt])+'_pis/pis_Nt='+str(Nts[nt])+'_Ns='+str(int(Ls[l]))+'_singles.h5', 'r') as hf:
            pis_singles = hf['persistence_images'][:]
        # concatenate the two
        pis = np.concatenate([pis_most, pis_singles], axis=1)

        # set up the netCDF4 dataset and save data
        name = "Nt=" + str(Nts[nt]) + "_Ns=" + str(int(Ls[l])) + "_pis_actions_polyakovs.nc"
        rootgrp = Dataset(name, "w", format="NETCDF4")
        rootgrp.description = ("Dataset containing computed persistence images, actions and polyakov loops for SU(2) lattice gauge configurations generated at different values of beta on a " + str(Nts[nt]) + "x" + str(int(Ls[l])) + "^3 lattice.")
        rootgrp.history = "Created " + time.ctime(time.time())
        rootgrp.source = "Actions and Polyakov loops generated using HiRep software and persistence images by custom software for project 'Probing center vortices and deconfinement in SU(2) lattice gauge theory with persistent homology' by N. Sale, B. Lucini, and J. Giansiracusa"
        
        # set up dimensions
        beta = rootgrp.createDimension("beta", len(bs[nt]))
        mc_step = rootgrp.createDimension("monte_carlo_step", pis.shape[1])
        degree = rootgrp.createDimension("homology_degree", 4)
        birth = rootgrp.createDimension("birth", 25)
        persistence = rootgrp.createDimension("persistence", 25)
        
        # set up variables
        betas = rootgrp.createVariable("betas",np.dtype('float64'),("beta",))
        births = rootgrp.createVariable("births",np.dtype('float64'),("birth",))
        persistences = rootgrp.createVariable("persistences",np.dtype('float64'),("persistence",))
        persistence_images = rootgrp.createVariable("persistence_images",np.dtype('float64'),("beta","monte_carlo_step","homology_degree","birth","persistence"))
        persistence_images.description = "25x25 persistence images obtained using linear weighting and sigma=5% of a pixel from persistence diagrams generated using the center vortex filtration"
        actions = rootgrp.createVariable("actions",np.dtype('float64'),("beta","monte_carlo_step"))
        polyakovs = rootgrp.createVariable("polyakovs",np.dtype('float64'),("beta","monte_carlo_step"))
        polyakovs.description = "Polyakov loops that wind round the lattice in the time direction"

        # copy across data
        betas[:] = bs[nt]
        births[:] = np.arange(-1,1,2/25)
        persistences[:] = np.arange(0,2,2/25)
        persistence_images[:] = pis
        actions[:] = acts[l]
        polyakovs[:] = polys[l]

        # save to disc
        rootgrp.close()
        print("packaged data for Nt="+str(Nts[nt])+" and Ns="+str(int(Ls[l])))
print("Done")