#! /usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Read plate reader data, subtract background, perform path length correction, plot the results, rearrange data table
structures and finally store them in a tab separated csv file.

See options via data_toolbox.py -h

Author: Niels Krausch
"""

try:
    import glob
    import logging
    import os
    import sys
    import pyblake2
    import io

    import configargparse

    # Import matplotlib, but turn plotter off to not require a graphical connection
    import matplotlib as mpl

    mpl.use('Agg')

    import matplotlib.pyplot as plt
    import numpy as np
    import pandas as pd
    from scipy import optimize
except ModuleNotFoundError as e:
    print("Important modules not installed. Please install the required modules with: pip install",
          str(e).split()[-1].strip("'"))
    sys.exit(2)

logging.basicConfig(format="%(asctime)s [%(levelname)-5.5s]  %(message)s", level=logging.INFO)

# Turn interactive plotting off to not require a graphical connection
plt.ioff()
plt.switch_backend('agg')

def self_log():
    ## Log myself with hash value
    def b2sum(src, digest_size_in_bytes=64, length=io.DEFAULT_BUFFER_SIZE):
        """Calculates a BLAKE2 hash sum of a file, given as file name.
        Returns the b2sum.
        """
        b2 = pyblake2.blake2b(digest_size=digest_size_in_bytes)
        with io.open(src, mode="rb") as fd:
            for chunk in iter(lambda: fd.read(length), b''):
                b2.update(chunk)
        return b2.hexdigest()
    path = os.path.abspath(__file__)
    logger = logging.getLogger(__name__)
    logger.info(f"Hi, this is: '{__name__}'")
    logger.info("I am located here:")
    logger.info(path)
    logger.info("My b2sum hash is:")
    logger.info(b2sum(path))
self_log()

## end of header


class Data:
    """Class for obtaining and storing data"""

    def __init__(self):

        # Add logging
        logger = logging.getLogger(__name__)
        logger.setLevel(logging.INFO)

        # define list of dataframes
        self.df_list = []

        # define list of files
        self.file_list = []

        # define list of pathlength correction dataframes
        self.diff_lst = []

        # define rearranged dataframe
        self.df_rearr = pd.DataFrame(
            columns=["Time_Point", "Unique_Sample_ID", "Wavelength", "Abs", "Replicate", "Is_neg_ctrl", "Condition_Name",
                     "Description"], data=None)

        # define vector with blank values
        self.blank_vals = pd.DataFrame()

        # define if path lengths should be corrected or only plotted
        self._plot_pathlength = True

        # define time points of measurement
        self.timepoints = np.array([])

        # define if plots should be drawn from grouped data
        self.number_of_replicates = 1

        # define data frame for mean values
        self.df_mean = pd.DataFrame(
            columns=["Time_Point", "Unique_Sample_ID", "Wavelength", "Abs", "Is_neg_ctrl", "Condition_Name", "Description"],
            data=None)

        # define data frame for std values
        self.df_std = pd.DataFrame(
            columns=["Time_Point", "Unique_Sample_ID", "Wavelength", "Abs", "Is_neg_ctrl", "Condition_Name", "Description"],
            data=None)

        # define Thymidine standard Abs.
        self._dT_short = pd.Series(index=np.arange(260, 321, 1), data=np.array(
            [0.34294881, 0.35104545, 0.35877637, 0.3674122, 0.37306622, 0.37927818, 0.38159658, 0.3838657, 0.38395245,
             0.38206777, 0.37890905, 0.37412931, 0.36685426, 0.35973397, 0.34498824, 0.33354408, 0.31691367, 0.30171548,
             0.28423285, 0.26868426, 0.24593206, 0.22801229, 0.20742373, 0.18761928, 0.16765667, 0.14921301, 0.12843222,
             0.11501664, 0.09854441, 0.08109583, 0.05931655, 0.04870929, 0.03925187, 0.02989993, 0.02045782, 0.01485991,
             0.01018223, 0.00693676, 0.00361987, 0.001264, -0.00107313, -0.00143884, -0.00278431, -0.003429,
             -0.00351574, -0.00486121, -0.00505344, -0.0053324, -0.00470304, -0.00470304, -0.00524566, -0.00543788,
             -0.00561137, -0.00561137, -0.00650439, -0.00543788, -0.00543788, -0.00561137, -0.00561137, -0.00599582,
             -0.00634622]), name="Thymidine")

        # define Thymine standard Abs.
        self._T_short = pd.Series(index=np.arange(260, 321, 1), data=np.array(
            [0.18503523, 0.19011903, 0.1960318, 0.20251454, 0.20795557, 0.21436909, 0.21805396, 0.22277542, 0.22728925,
             0.23186035, 0.2338412, 0.23677748, 0.24054275, 0.2415272, 0.24271926, 0.24552909, 0.24579397, 0.24798168,
             0.24914474, 0.24903533, 0.25084876, 0.25098717, 0.25249552, 0.25152813, 0.25136146, 0.25136146, 0.24992232,
             0.25112559, 0.25017014, 0.24907629, 0.24668095, 0.24569651, 0.2425816, 0.23862067, 0.23184991, 0.22711139,
             0.21979968, 0.2138579, 0.20538312, 0.19585469, 0.18075877, 0.17023394, 0.15961165, 0.14824079, 0.13468222,
             0.11989137, 0.10568244, 0.09375867, 0.08255448, 0.06876003, 0.05543733, 0.04444076, 0.03535582, 0.02729553,
             0.01940191, 0.01305759, 0.00793359, 0.00485889, 0.00229689, -0.00033431, -0.00372529]), name="Thymine")

        # define substrate standard Abs.
        self.subs = pd.Series()

        # define product standard Abs.
        self.prod = pd.Series()

        # define list of conversion rates
        self.conv_rates = pd.DataFrame(columns=["Condition_Name", "Time_Point", "Par_a", "Par_b", "Variance_a", "Variance_b",
                                                "Covariance"], data=None)

        # define if raw data files should be plotted
        self._plot_raw_files = False

        # set initial concentration of the substrates
        self.initial_concs = np.array([], dtype="float")

        # data frame with concentrations of substrate and product as well as variances
        self.concs = []

        # flag for loading own reference standards
        self._standards = False

        # flag for switching to standard preparation mode
        self.prep = False

        # define arrays for concentrations of the standard substrate
        self.subs_conc = np.array([])

        # define arrays for concentrations of the standard product
        self.prod_conc = np.array([])

        # set file extension of graphs
        self.graph_file_ext = ".png"

        # define number of experimental conditions
        self.number_of_conditions = 0

        # Try to use ggplot style
        try:
            plt.style.use('ggplot')
        except FileNotFoundError:
            print("'ggplot' style not found, will use default")
            pass

        # Create folder graphs
        if not os.path.exists("Graphs"):
            os.makedirs("Graphs")

        # create folder for csv files
        if not os.path.exists("csv_files"):
            os.makedirs("csv_files")

        # set flag for debugging
        self._debug = False

        # set flag for no plotting at all
        self._no_plot = False

        # store meta data
        self._meta_data = pd.DataFrame()

    def parse_args(self, argv=None):
        """
        Parse initial arguments.

        :return:
        """

        logger = logging.getLogger(__name__)

        if argv is None:
            argv = sys.argv[1:]

        p = configargparse.ArgParser(default_config_files=['~/Uni/test_config.txt'])
        p.add_argument('-c', '--my-config', metavar="config_file", is_config_file=True, help='Config file')
        p.add_argument('--correct_pathlength', dest='plotpath', help='Correct pathlength instead of only plotting them',
                       action='store_false')
        p.add_argument('--number_of_replicates', metavar="number_of_replicates", dest='number_of_replicates',
                       help='Plot replicates as averaged points instead of individual points.')
        p.add_argument('--t', '--timepoints', metavar="timepoints", nargs="+", help='Timepoints of measurement')
        p.add_argument('--plot_raw', dest='plotraw', help='Plot raw datafiles', action='store_true')
        p.add_argument('--i', '--init_conc', metavar="initial_concentration", dest='conc', nargs="+",
                       help='Define the starting condition of the substrate [mM]')
        p.add_argument('--standard', dest='standard', help='Use own standard concentrations as reference for '
                                                           'curve fitting. Must be supplied as csv files.',
                       action='store_true')
        p.add_argument('--prepare_standards', dest='prep',
                       help='If set, reads data to prepare standard absorption files which can be used in '
                            'subsequent analysis', action='store_true')
        p.add_argument('--sc', metavar="concentrations_of_substrates",
                       help='Define concentrations of the standard substrates', nargs='+')
        p.add_argument('--pc', metavar="concentrations_of_products",
                       help='Define concentrations of the standard products', nargs='+')
        p.add_argument('--svg', dest='svg', help='Plot graphs as svg instead of png.', action='store_true')
        p.add_argument('--v', dest='debug', help='Show verbose information.', action='store_true')
        p.add_argument('--cond', dest='cond', metavar="number_of_conditions",
                       help='Define number of experimental conditions')
        p.add_argument('--np', dest='no_plot', help='Disable plotting', action='store_true')
        p.add_argument('--meta_data', metavar="metadata_file", dest='meta_data', help='Config file for meta data')

        args = p.parse_args(argv)

        if args.debug:
            logger.setLevel(logging.DEBUG)

        self._plot_pathlength = args.plotpath

        self._plot_raw_files = args.plotraw
        if args.conc:
            self.initial_concs = np.array(args.conc, dtype="float")
        self._standards = args.standard
        self.prep = args.prep

        # Exit program if concentrations not given
        if args.prep and not (args.sc and args.pc):
            error_message = "Please specify input concentrations by using --sc and --pc!"
            logger.critical(error_message)
            raise(Exception(error_message))

        if args.prep and len(args.sc) != len(args.pc):
            error_message = "Please specify equal numbers of concentrations for substrate and product!"
            logger.critical(error_message)
            raise(Exception(error_message))

        self.subs_conc = np.array(args.sc, dtype="float")
        self.prod_conc = np.array(args.pc, dtype="float")

        if args.svg:
            self.graph_file_ext = ".svg"


        if args.no_plot:
            self._no_plot = True

        if not args.meta_data:
            error_message = "Please link the metadata file!"
            logger.critical(error_message)
            raise(Exception(error_message))
        else:
            self._meta_data = pd.read_csv(args.meta_data)


        ## extract the meta data

        if not args.t:
            logger.debug("Setting the timepoints automatically from entries found in metadata...")
            all_timepoints = self._meta_data.Time_Point.unique()
            self.timepoints = np.array(all_timepoints, dtype="float")
        else:
            self.timepoints = np.array(args.t, dtype="float")

        if not args.cond:
            try:
                logger.debug("Setting the number of conditions automatically from entries found in metadata...")
                self.names_of_conditions = self._meta_data.Condition_Name.unique()
                # remove every entry with "blank" in its name
                self.names_of_conditions = [n for n in self.names_of_conditions if not "blank" in n]
                self.number_of_conditions = len(self.names_of_conditions)
                logger.debug("I set the number of conditions to {}.".format(self.number_of_conditions))
            except Exception:
                error_message = "Please specify the number of conditions or have them in the metadata!"
                logger.critical(error_message)
                raise(Exception(error_message))
        else:
            self.number_of_conditions = int(args.cond)

        if not args.number_of_replicates:
            logger.debug("Setting the number of replicates automatically from entries found in metadata...")
            self.number_of_replicates = len(self._meta_data.Replicate.unique())
            logger.debug("I set the number of replicates to {}.".format(self.number_of_replicates))


        ## check meta data for plausibility
        for sample in self._meta_data.Unique_Sample_ID.unique():
            test_results = self._meta_data[self._meta_data["Unique_Sample_ID"] == sample]
            if len(test_results) != 1:
                logger.critical("Your metadata is inplausible, the Sample_IDs are NOT unique!")
                logger.critical("Please check these entries:")
                logger.critical(test_results)


    def read_data(self):
        """Read data from csv files and store them as dataframes in a list."""

        logger = logging.getLogger(__name__)
        logger.info("Reading files...")

        # Get all tab separated files00
        all_files = self._meta_data.Data_File.unique()

        for progress_indexer, file in enumerate(all_files):
            #self.file_list.append(file)

            logger.info(f"Reading file {progress_indexer+1} of a total of {len(all_files)} files.")
            logger.info(f"Reading all data from file '{file}'")

            tmp = pd.read_csv(file, delimiter=r"\s+")

            # Replace Overflow values with NaN
            tmp = tmp.replace("OVRFLW", np.nan)

            if np.any(tmp.isna()):
                logger.debug(
                    f"The data frame contains {tmp.isna().sum().sum()} Overflow values, which were replaced by NaN.")

            # Set Wavelength as index
            tmp = tmp.set_index("Wavelength")

            # Workaround where some numbers are not recognized as float
            tmp = tmp.astype(float)

            ## rename columns from Well IDs (e.g. A1, A2, ..., B9, B10, ... H11, H12) to Unique_Sample_ID
            rename_rule_dict = dict()
            all_entries_to_map = self._meta_data.query(f"Data_File == '{file}'")

            for _, i in all_entries_to_map.iterrows():
                map_from = i.Well_Number
                map_to = i.Unique_Sample_ID
                rule = { map_from : map_to }
                rename_rule_dict.update( rule )

            logger.debug("I will map with the following rule set:")
            logger.debug(rename_rule_dict)
            tmp = tmp.rename(mapper = rename_rule_dict, axis="columns")
            logger.debug("Renaming finished. Find the result below:")
            logger.debug(tmp)

            self.df_list.append(tmp)

        self.df_complete = pd.concat(self.df_list)

        logger.info("Done reading files!")


    def sub_background(self):
        """
        Subtracting the background from the measured data if blank is given.

        :return:
        """
        logger = logging.getLogger(__name__)

        logger.info("Subtracting background signal from samples...")

        # Get all tab separated files
        all_blanks = self._meta_data.Blank_Unique_Sample_ID.unique()

        for name_of_current_blank in all_blanks:
            which_samples_to_blank = self._meta_data.query(f"Blank_Unique_Sample_ID == '{name_of_current_blank}'").Unique_Sample_ID
            subtract_df = self.df_complete.copy()
            subtract_df[:] = 0
            for current_sample in which_samples_to_blank:
                subtract_df[current_sample] = self.df_complete[name_of_current_blank]
            self.df_complete = self.df_complete.sub(subtract_df)

        # Drop blank columns
        self.df_complete = self.df_complete.drop(columns=[_ for _ in self.df_complete.columns if "blank" in _])

        logger.info("Done subtracting background signal!")



    def pathlength_correction(self):
        """
        Correct the pathlength of the sample based on Abs. at 900/977 nm.

        :return:
        """
        logger = logging.getLogger(__name__)

        logger.info("Reading files for pathlength correction...")

        # Get all tab separated files
        all_files = self._meta_data.Data_File.unique()

        pathlength_df_list = []
        for progress_indexer, file in enumerate(all_files):
            prefix, suffix = file.split("Read ")
            cutoff_left = prefix.rfind("/") +1
            prefix = prefix[cutoff_left:]

            all_977_files = glob.glob("./Pathlength_corr/*977.tsv")
            identified_correct_977 = [_ for _ in all_977_files if prefix in _]

            if len(identified_correct_977) != 1:
                logger.critical("Couldn't identify correct 977 file for path length correction. It is about this entry:")
                logger.critical(file)
                sys.exit(2)

            all_900_files = glob.glob("./Pathlength_corr/*900.tsv")
            identified_correct_900 = [_ for _ in all_900_files if prefix in _]

            if len(identified_correct_900) != 1:
                logger.critical("Couldn't identify correct 900 file for path length correction. It is about this entry:")
                logger.critical(file)
                sys.exit(2)

            f977 = pd.read_csv(identified_correct_977[0], delimiter="\t")
            f900 = pd.read_csv(identified_correct_900[0], delimiter="\t")


            # Subtract Abs_900 from Abs_977 and divide by 0.179 = deltaAbs H2O
            # this yields the pathlength in cm
            pathlength_df = f977.copy()
            pathlength_df["Mean"] = ( f977.Mean - f900.Mean ) / 0.179


            ## rename columns from Well IDs (e.g. A1, A2, ..., B9, B10, ... H11, H12) to Unique_Sample_ID
            rename_rule_dict = dict()
            all_entries_to_map = self._meta_data.query(f"Data_File == '{file}'")

            for _, i in all_entries_to_map.iterrows():
                map_from = i.Well_Number
                map_to = i.Unique_Sample_ID
                rule = { map_from : map_to }
                rename_rule_dict.update( rule )

            #logger.debug("I will map with the following rule set:")
            #logger.debug(rename_rule_dict)
            pathlength_df = pathlength_df.rename(mapper = rename_rule_dict, axis="columns")
            #logger.debug("Renaming finished. Find the result below:")
            #logger.debug(pathlength_df)

            pathlength_df_list.append(pathlength_df)

        full_pathlength_df = pd.concat(pathlength_df_list)
        full_pathlength_df = full_pathlength_df[["Name", "Mean"]]
        full_pathlength_df = full_pathlength_df.rename(mapper={"Name" : "Unique_Sample_ID"}, axis="columns")

        logger.info("Performing pathlength correction...")
        #print(full_pathlength_df)

        for current_column in self.df_complete.columns:
            m = full_pathlength_df.query(f"Unique_Sample_ID == '{current_column}'").Mean
            if len(m) != 1:
                logger.critical(f"Did not find a pathlength equivalent to {current_column}.")
            else:
                self.df_complete[current_column] = self.df_complete[current_column] / float(m)

        logger.info("... done.")

        full_pathlength_df.to_csv("./csv_files/00_sample_pathlengths.csv")

        # Save pathlength of the samples as csv

        # Print a warning when pathlength differs too much from median
        if np.any(abs(full_pathlength_df.Mean.values - full_pathlength_df.Mean.median(axis=0)) > 0.15):
            logger.warning(
                f"Warning: The following samples differed strongly (> 0.15) in filling height from the "
                f"others: {np.nonzero(abs(full_pathlength_df.Mean.values - full_pathlength_df.Mean.median(axis=0)) > 0.15)}")



        ## plotting, if wished for...
        if not self._no_plot:
            df_list_complete = pd.read_csv("./csv_files/00_sample_pathlengths.csv")
            median = df_list_complete.Mean.median(axis=0)
            logger.info("Performing plotting of pathlength...")
            df_list_complete.Mean.plot(figsize=(9.5 * 0.7, 4.46 * 0.7), style="o")
            plt.xlabel("Sample No.")
            plt.ylabel("Pathlength [cm]")
            plt.title("Pathlength of the different samples", fontsize=10, fontweight="bold")
            if not min(df_list_complete.Mean) < 0.7:
                plt.ylim(median - 0.2, median + 0.2)
            else:
                plt.ylim(min(df_list_complete) - 0.1, 1)
            plt.axhline(y=median, color="black", linestyle="dashed")
            plt.axhline(y=median + 0.15, color="black", linestyle="dotted", linewidth=1)
            plt.axhline(y=median - 0.15, color="black", linestyle="dotted", linewidth=1)
            plt.tight_layout()
            plt.savefig("./Graphs/00_Pathlength" + self.graph_file_ext, dpi=800)
            plt.close("all")


    def plot_data(self):
        """
        Plot all stored data frames and save them as PNG.

        :return:
        """
        logger = logging.getLogger(__name__)

        if self._plot_raw_files and not self._no_plot:
            logger.info("Plotting raw files...")

            # Find highest Abs for y axis
            max_abs = []
            for df in self.df_list:
                max_abs.append(df.max().max())

            highest_abs = float(round(max(max_abs), 1))

            for no, df in enumerate(self.df_list):
                df.iloc[-61:, :].plot(figsize=(9.5 * 0.7, 4.46 * 0.7), style=".")
                plt.title("Spectrum of file " + self.file_list[no][:-4], fontsize=10, fontweight="bold")
                plt.ylim((-0.025, highest_abs + 0.1))
                plt.xlabel("Wavelength [nm]")
                plt.ylabel("Abs.")
                plt.tight_layout()
                plt.savefig("./Graphs/" + self.file_list[no][:-4] + self.graph_file_ext, dpi=800)
                plt.close("all")

            logger.info("Done plotting raw files!")

    def rearrange_data(self):
        """
        Rearrange dataframes for better handling.

        :return:
        """
        logger = logging.getLogger(__name__)

        logger.info("Start rearranging dataframes...")

        df_collector = []
        # Loop over samples in meta data, create tmp data frame, fill with data from meta data and append to df rearr
        for sample in self._meta_data["Unique_Sample_ID"]:

            # Do not include the blanks in the rearranged data frame.
            # If "blank" is in the sample ID, we will skip to the next entry.
            if "blank" in sample.lower():
                continue

            current_row_vals = self._meta_data[self._meta_data["Unique_Sample_ID"] == sample]

            # Create tmp data frame
            number_of_wavelength_entries = len(np.arange(float(current_row_vals["Wavelength_Start"]), float(current_row_vals["Wavelength_End"]) + 1., 1))
            tmp_df = pd.DataFrame(index=np.arange(0, number_of_wavelength_entries, 1), columns=self._meta_data.columns)

            # Fill tmp data frame
            tmp_df["Unique_Sample_ID"] = sample

            tmp_df["Time_Point"] = current_row_vals["Time_Point"].values[0]
            tmp_df["Is_neg_ctrl"] = current_row_vals["Is_neg_ctrl"].values[0]
            tmp_df["Replicate"] = current_row_vals["Replicate"].values[0]
            tmp_df["Condition_Name"] = current_row_vals["Condition_Name"].values[0]
            tmp_df["Description"] =  current_row_vals["Description"].values[0]

            tmp_df["Wavelength"] = np.arange(float(current_row_vals["Wavelength_Start"]), float(current_row_vals["Wavelength_End"]) + 1., 1)

            #print(self.df_complete) #[tmp_df["Wavelength"].tolist()])
            tmp_df["Abs"] =  self.df_complete.loc[tmp_df["Wavelength"].tolist(), sample].values

            #logger.debug(f"Current tmp data_frame: \n{tmp_df}")

            df_collector.append(tmp_df)

        ## join all collected dataframes into one
        self.df_rearr = pd.concat(df_collector, ignore_index=True)
        #print(self.df_rearr)

        # Save rearranged data in tab-separated csv
        self.df_rearr.to_csv("./csv_files/01_rearranged_data.csv")

        logger.info("Done rearranging dataframes!")

    def query_wavelength(self, wavelength):
        """
        Returns the Absorption of a queried wavelength for all stored data.

        :param wavelength: Desired wavelength.
        :return: Abs. at given wavelength.
        """
        logger = logging.getLogger(__name__)

        abs_at_wavelegnth = self.df_rearr.loc[self.df_rearr["Wavelength"] == wavelength]

        logger.debug(abs_at_wavelegnth)

        return abs_at_wavelegnth

    def group_data(self, groupno=None):
        """
        Groups replicate experiments together.

        :param groupno: Define how many columns should be grouped.
        :return:
        """
        logger = logging.getLogger(__name__)

        if not groupno and self.number_of_replicates > 0:
            groupno = self.number_of_replicates

        logger.info(
            f"Start grouping the {groupno} replicate experiment(s) for {self.number_of_conditions} different "
            f"conditions.")

        if self.number_of_replicates > 1:

            for timepoint in self.timepoints:
                for i, name_of_condition in enumerate(self.names_of_conditions):
                    tmp_df_to_append = pd.DataFrame(columns=self.df_mean.columns.tolist())

                    # Define query command
                    query = f"Condition_Name == '{name_of_condition}' & Time_Point == {timepoint}"

                    #logger.debug("I am considering the following entries for grouping:")
                    #logger.debug(self.df_rearr.query(query))

                    # Query wavelength and same range in index no
                    wavelength_range = np.unique(self.df_rearr.query(query).loc[:, "Wavelength"])
                    wavelength_len = len(wavelength_range)

                    # First query all columns, which store identical data in every replicate
                    tmp_df_to_append = tmp_df_to_append.append(self.df_rearr.query(query).loc[:,
                                                               ["Time_Point", "Wavelength", "Is_neg_ctrl", "Condition_Name",
                                                                "Description"]].iloc[:wavelength_len, :], sort=False,
                                                               ignore_index=True)

                    # Join sample names and put them to sample column
                    tmp_df_to_append.loc[:, "Unique_Sample_ID"] = "_".join(
                        np.unique(self.df_rearr.query(query).loc[:, "Unique_Sample_ID"].values))

                    # Add mean of Abs of replicates

                    tmp_list_means = []
                    tmp_list_std = []
                    for wavelength in self.df_rearr.query(query).Wavelength.unique():
                        all_entries_for_current_wavelength = self.df_rearr.query(query).query(f"Wavelength == '{wavelength}'")
                        #logger.debug("Considering these entries for this wavelength averaging:")
                        #logger.debug(all_entries_for_current_wavelength)
                        averaged_absorbance_for_current_wavelength = all_entries_for_current_wavelength.Abs.mean(skipna=True)
                        tmp_list_means.append( averaged_absorbance_for_current_wavelength )

                        standarddeviation_absorbance_for_current_wavelength = all_entries_for_current_wavelength.Abs.std(skipna=True)

                        if np.isnan(standarddeviation_absorbance_for_current_wavelength):
                            standarddeviation_absorbance_for_current_wavelength = 0
                        tmp_list_std.append( standarddeviation_absorbance_for_current_wavelength)



                    # Append tmp_df to df_mean
                    tmp_df_to_append.loc[:, "Abs"] = tmp_list_means
                    self.df_mean = self.df_mean.append(tmp_df_to_append, sort=False, ignore_index=True)

                    # Do the same for df_std
                    # First change Abs from mean to std
                    tmp_df_to_append.loc[:, "Abs"] = tmp_list_std
                    self.df_std = self.df_std.append(tmp_df_to_append, sort=False, ignore_index=True)

        else:
            self.df_mean = self.df_rearr.copy()
            self.df_std  = self.df_rearr.copy()
            self.df_std["Abs"] = 0


        # Store mean and std as csv files
        self.df_mean.to_csv("./csv_files/03_df_mean.csv")
        self.df_std.to_csv("./csv_files/04_df_std.csv")

    def plot_all_timepoints(self):
        """
        Plot data to every time point.

        :return:
        """
        logger = logging.getLogger(__name__)

        # Plot the samples at every time point for every experimental condition
        if not self._no_plot:
            logger.info("Start plotting...")

            for cond, condition_name in enumerate(self.names_of_conditions):
                logger.debug(f"I am plotting now condition {condition_name}")
                plt.figure(figsize=(9.5 * 0.7, 4.46 * 0.7))

                for timepoint in self.timepoints:
                    query = f"Condition_Name == '{condition_name}' & Time_Point == {timepoint} & Wavelength >= 260 & " \
                            f"Wavelength <= 320"

                    df_mean_queried = self.df_mean.query(query)
                    if len(df_mean_queried) == 0:
                        pass
                    else:
                        logger.debug(f"... plotting timepoint {timepoint}")
                        plt.errorbar(df_mean_queried["Wavelength"], df_mean_queried["Abs"],
                                     1.96 * self.df_std.query(query)["Abs"], fmt=".", label=f"t = {timepoint}")



                plt.ylim((-0.025, 0.7))
                plt.xlabel("Wavelength [nm]")
                plt.ylabel("Abs.")
                plt.title(f"Spectrum of condition '{condition_name}'", fontsize=10, fontweight="bold")
                plt.legend(loc="best")
                plt.tight_layout()
                plt.savefig(f"./Graphs/_cond_{cond + 1}" + self.graph_file_ext, dpi=800)
                plt.close("all")

    def standard_preparation(self):
        """
        Calculates standard absorptions for substrate and product and stores them in csv files.

        :return:
        """
        logger = logging.getLogger(__name__)

        logger.info("Starting preparation of standards...")

        # Create data frames for storing the mean values
        subs1 = pd.DataFrame(index=self.df_list[0].index, columns=["subs1_1", "subs1_2"])
        subs2 = pd.DataFrame(index=self.df_list[0].index, columns=["subs2_1", "subs2_2"])
        subs3 = pd.DataFrame(index=self.df_list[0].index, columns=["subs3_1", "subs3_2"])

        prod1 = pd.DataFrame(index=self.df_list[0].index, columns=["prod1_1", "prod1_2"])
        prod2 = pd.DataFrame(index=self.df_list[0].index, columns=["prod2_1", "prod3_2"])
        prod3 = pd.DataFrame(index=self.df_list[0].index, columns=["prod3_1", "prod3_2"])

        # Copy values from read data and calculate mean
        subs1.iloc[:, 0] = self.df_list[0].iloc[:, 0]
        subs1.iloc[:, 1] = self.df_list[0].iloc[:, 6]
        subs1 = subs1.mean(axis=1)

        subs2.iloc[:, 0] = self.df_list[0].iloc[:, 1]
        subs2.iloc[:, 1] = self.df_list[0].iloc[:, 7]
        subs2 = subs2.mean(axis=1)

        subs3.iloc[:, 0] = self.df_list[0].iloc[:, 2]
        subs3.iloc[:, 1] = self.df_list[0].iloc[:, 8]
        subs3 = subs3.mean(axis=1)

        prod1.iloc[:, 0] = self.df_list[0].iloc[:, 3]
        prod1.iloc[:, 1] = self.df_list[0].iloc[:, 9]
        prod1 = prod1.mean(axis=1)

        prod2.iloc[:, 0] = self.df_list[0].iloc[:, 4]
        prod2.iloc[:, 1] = self.df_list[0].iloc[:, 10]
        prod2 = prod2.mean(axis=1)

        prod3.iloc[:, 0] = self.df_list[0].iloc[:, 5]
        prod3.iloc[:, 1] = self.df_list[0].iloc[:, 11]
        prod3 = prod3.mean(axis=1)

        # Normalize to middle concentration
        # Get concentration of the middle concentrated substrate
        conc_mid = self.subs_conc[np.argsort(self.subs_conc)[len(self.subs_conc) // 2]]

        # Calculate factors for normalizing
        subs_conc_factors = conc_mid / self.subs_conc
        prod_conc_factors = conc_mid / self.prod_conc

        # Normalize to middle concentration of substrate
        subs1 = subs1 * subs_conc_factors[0]
        subs2 = subs2 * subs_conc_factors[1]
        subs3 = subs3 * subs_conc_factors[2]

        prod1 = prod1 * prod_conc_factors[0]
        prod2 = prod2 * prod_conc_factors[1]
        prod3 = prod3 * prod_conc_factors[2]

        # Calculate mean for substrate and product absorptions
        subs = pd.concat([subs1, subs2, subs3], axis=1).mean(axis=1)
        prod = pd.concat([prod1, prod2, prod3], axis=1).mean(axis=1)

        # Shrink if length too large
        if len(subs) > 61:
            logger.debug("Spectrum too large, now shrinking it.")
            subs = subs.loc[260:320]

        if len(prod) > 61:
            logger.debug("Spectrum too large, now shrinking it.")
            prod = prod.loc[260:320]

        # Convert to data frame
        try:
            subs = subs.to_frame()
            subs.columns = ["Abs."]
            prod = prod.to_frame()
            prod.columns = ["Abs."]

        # Pass if already a data frame
        except AttributeError:
            pass

        # Save as csv
        subs.to_csv("./csv_files/05_substrate_abs.csv")
        prod.to_csv("./csv_files/06_product_abs.csv")

        logger.info("Done preparing standards.")

    def conversion_rate(self):
        """
        Calculates the conversion rates, at every time point.

        :return:
        """
        logger = logging.getLogger(__name__)

        logger.info("Reading df_mean and df_std ...")
        self.df_mean = pd.read_csv("./csv_files/03_df_mean.csv")
        self.df_std  = pd.read_csv("./csv_files/04_df_std.csv")
        logger.info("... done.")

        logger.info("Starting calculation of the conversion rate...")

        if self._standards:
            # Load files with standard Abs. and convert to series

            logger.debug("Will use external standards for calculation.")

            tmp_df = pd.read_csv("./csv_files/05_substrate_abs.csv", delimiter="\t")
            tmp_df = tmp_df.set_index("Wavelength")
            tmp_df = tmp_df.iloc[:, "Abs"]
            self.subs = tmp_df

            tmp_df = pd.read_csv("./csv_files/06_product_abs.csv", delimiter="\t")
            tmp_df = tmp_df.set_index("Wavelength")
            tmp_df = tmp_df.iloc[:, "Abs"]
            self.prod = tmp_df

            subs = self.subs
            prod = self.prod

        else:

            logger.debug("Will use internal standards for calculation")

            subs = self._dT_short

            subs_df                 = subs.to_frame()
            subs_df["Wavelength"]   = subs_df.index
            subs_df["Abs"]          = subs
            subs_df.to_csv("./csv_files/05_substrate_abs.csv", sep="\t")


            prod = self._T_short

            prod_df                 = prod.to_frame()
            prod_df["Wavelength"]   = prod_df.index
            prod_df["Abs"]          = prod
            prod_df.to_csv("./csv_files/06_product_abs.csv", sep="\t")


        def fitting_eq(x, a, b):
            """
            Calculates combination of a Mol Substrate and 1-a Mol Product and a scaling factor b.

            :param x: Unused, necessary for curve_fit function
            :param a: Amount [mol] Substrate
            :param b: Scaling factor
            :return: Pandas Series, combination of substrate and product
            """

            return (a * subs.values + (1 - a) * prod.values) * b

        # Perform curve fitting and plot results
        if not self._no_plot:
            logger.info("Performing curve fitting and plotting...")

        else:
            logger.info("Performing curve fitting...")


        for condition, condition_name in enumerate(self.names_of_conditions):
            logger.debug(f"Fitting condition '{condition_name}'...")
            logger.debug(f"I am extracting timepoints for this condition. These are:")
            entry_for_condition = self._meta_data.query(f"Condition_Name == '{condition_name}'")
            timepoints_for_this_condition = entry_for_condition.Time_Point.unique()
            logger.debug(timepoints_for_this_condition)
            for timepoint in timepoints_for_this_condition:
                logger.debug(f"... fitting at timepoint {timepoint}...")
                curve = np.array(self.df_mean.query(
                    f"Condition_Name == '{condition_name}' & Time_Point == {timepoint} & Wavelength >= 260 & Wavelength "
                    f"<= 320")["Abs"].values, dtype="float")
                standard_deviations = np.array(self.df_std.query(
                    f"Condition_Name == '{condition_name}' & Time_Point == {timepoint} & Wavelength >= 260 & Wavelength "
                    f"<= 320")["Abs"].values, dtype="float")

                standard_deviations = [std if std!= 0 else np.finfo(float).eps for std in standard_deviations]

                logger.debug(curve)
                popt, pcov = optimize.curve_fit(f = fitting_eq, xdata = np.arange(260, 321, 1), ydata = curve, sigma = standard_deviations)
                logger.debug(f"Fitted values are:\npopt = {popt} and pcov = {pcov}")

                combination = (popt[0] * subs + (1 - popt[0]) * prod) * popt[1]
                combination = combination.rename("Fitted curve")
                if not self._no_plot:
                    plt.figure(figsize=(9.5 * 0.7, 4.46 * 0.7))
                    plt.errorbar(np.arange(260, 321, 1), curve, standard_deviations, fmt=".")
                    plt.errorbar(combination.index, combination, 1.96 * np.sqrt(np.diag(pcov)[0] * combination))
                    plt.ylim((-0.025, 0.7))
                    plt.xlabel("Wavelength [nm]")
                    plt.ylabel("Abs.")
                    plt.title(
                        f"Fit of condition '{condition_name}' and time point t{timepoint}\nwith parameters"
                        f"popt = {popt} and pcov = {pcov}", fontsize=10, fontweight="bold")
                    plt.legend(
                        [f"+Enz, condition {condition + 1}, t{timepoint}", "Fitted curve"],
                        loc="best")
                    plt.tight_layout()
                    plt.savefig(
                        f"./Graphs/fit_cond_{condition + 1}_t_{timepoint}" + self.graph_file_ext,
                        dpi=800)
                    plt.close("all")

                # Store conversion rates
                tmp_df = pd.DataFrame(columns=self.conv_rates.columns.tolist(), index=[0])
                tmp_df.iloc[0, :] = condition_name, timepoint, popt[0], popt[1], np.diag(pcov)[0], np.diag(pcov)[1], \
                                   pcov[0][1]

                logger.debug(f"Current data frame to append: {tmp_df}")

                self.conv_rates = self.conv_rates.append(tmp_df, ignore_index=True)

        if not self._no_plot:
            logger.info("Done with curve fitting and plotting!")
        else:
            logger.info("Done with curve fitting!")

        plt.close("all")

        # Store conversion rates and covariances
        self.conv_rates.to_csv("./csv_files/08_conv_rates.csv")

    def plot_conversion_rates(self):
        """
        Plots the conversion rates with 95% CI at different time points.

        :return:
        """
        logger = logging.getLogger(__name__)

        logger.info("Reading the stored conversion rates ...")
        self.conv_rates = pd.read_csv("./csv_files/08_conv_rates.csv")
        logger.info("... Done.")

        logger.info("Starting plotting of conversion rates...")

        # Plot conversion rates with 95% CI at different time points
        if not self._no_plot:
            for index, condition_name in enumerate(self.names_of_conditions):
                plt.figure(figsize=(9.5 * 0.7, 4.46 * 0.7))

                logger.debug(f"... for condition '{condition_name}'...")
                logger.debug(f"I am extracting timepoints for this condition. These are:")
                entry_for_condition = self._meta_data.query(f"Condition_Name == '{condition_name}'")
                timepoints_for_this_condition = entry_for_condition.Time_Point.unique()
                logger.debug(timepoints_for_this_condition)

                for timepoint in timepoints_for_this_condition:
                    query = f"Condition_Name == '{condition_name}' & Time_Point == {timepoint}"
                    plt.errorbar(timepoint, 100 * (
                            1 - self.conv_rates.query(query)["Par_a"].values[0]), 1.96 * np.sqrt(
                        abs(self.conv_rates.query(query)["Variance_a"].values[0])), marker="o",
                                 color="#348abdff")

                plt.ylim(0, 100)
                plt.xlabel("Time [min]")
                plt.ylabel("Conversion rate [%]")
                plt.title("Conversion rate over time", fontsize=10, fontweight="bold")
                plt.tight_layout()
                plt.savefig(f"./Graphs/01_Conversion_rates_cond_{index + 1}" + self.graph_file_ext, dpi=800)
                plt.close("all")

        # Store concentrations of substrates
        for index, condition_name in enumerate(self.names_of_conditions):
            entry_for_condition = self._meta_data.query(f"Condition_Name == '{condition_name}'")
            timepoints_for_this_condition = entry_for_condition.Time_Point.unique()

            self.concs.append(
                pd.DataFrame(columns=["Condition_Name", "Substrate", "Product", "Conversion_Var"], index=timepoints_for_this_condition))

            # Change name of index
            self.concs[-1].index.name = "Time_Point"

            # Store Data in Data Frame
            for timepoint in timepoints_for_this_condition:
                self.concs[index].loc[timepoint, "Condition_Name"] = condition_name

                query = f"Condition_Name == '{condition_name}' & Time_Point == {timepoint}"

                self.concs[index].loc[timepoint, "Substrate"] = self.initial_concs[index] * self.conv_rates.query(
                    query)["Par_a"].values[0]

                self.concs[index].loc[timepoint, "Product"] = self.initial_concs[index] * (
                        1 - self.conv_rates.query(query)["Par_a"].values[0])

                self.concs[index].loc[timepoint, "Conversion_Var"] = 1.96 * np.sqrt(abs(
                    self.conv_rates.query(query)["Variance_a"].values[0]))

            # Store as csv
            self.concs[index].to_csv(f"./csv_files/07_concentrations_condition_{index + 1}.csv")

        logger.info("Done!")


def main():
    self_log()
    data = Data()
    data.parse_args()
    data.read_data()
    data.sub_background()
    data.pathlength_correction()
    if data.prep:
        data.standard_preparation()
    else:
        data.plot_data()
        data.rearrange_data()
        data.group_data()
        data.plot_all_timepoints()
        data.conversion_rate()
        data.plot_conversion_rates()
        plt.close("all")


if __name__ == "__main__":
    main()
