"""This module contains an implementation of a hampel filter based random telegraph noise removal tool; In which a 
hampel filter is used for the purpose of segment wise outlier detection in the sample-wise difference signal computed from 
the corrupted raw signal. Subsequently the detected outliers are replaced using a segment wise regression method. Once 
the detection and replacement are completed, the cumulative sum of the de-noised difference signal is computed to arrive 
at a de-noised realisation of the noisy time series.
"""

# import section:
import numpy as np

class Hampel(object):
    """The Hampel class provides an implementation of a segment wise outlier detection in combinatioan with a segment 
    based regression for outlier replacement. The outlier detection and repalcement are performed on the sample wise 
    difference signal which is computed from the raw data.

    :param arr: raw data containing jumps. 
    :type arr: numpy.ndarray of shape [N x 1] 
    :param th: Select threshold method - 'hard' hard coded threshold for all segments - 'med' use median values of segments to defined thresholds - 'sig'  use scale estimate of the segements standard deviation
    :type th: string
    :param th_val: Threshold scale factor on threshold values which are dereived by methods; used a calibration parameter.
    :type th_val: float
    :param re: Select outlier replacement method: 're', 'med', 'plf'
    :type re: string
    :param nperseg: number of samples per segment
    :type nperseg: int
    :param settings: dictionary containing settings for np.polyfit.
    :type settings: dict
    :param avoid_implosion: Use additional median filter to prevent implosion of segment based thresholds; Which happens if a segment contains more than 50 percent of outliers.
    :type avoid_implosion: Bool 
    """

    def __init__(self, arr, th='med', th_val=1.1, re='med', nperseg=25, settings=None, avoid_implosion=True):
        """Constructor method
        """
        # preallocate empty attr.
        self.labels = None
        self.th_arr = None

        # from settings stings to methods:
        self.arr = arr
        self.th_val = th_val
        self.nperseg = nperseg
        self.avoid_implosion = avoid_implosion
        self.th = eval('self._th_' + th)
        self.re = eval('self._re_' + re)
        self.settings = settings

        # perform the reconstruction(s)
        self.differences = np.hstack([0, np.diff(arr)])
        self.differences = self.reconstruction_pipe(arr=self.differences, nperseg=nperseg)
        self.reconstruction = np.cumsum(self.differences) + self.arr[0]

        # clusters, counts, scores:
        self.clusters = self._clusters()
        self.counts = self._counts()
        self.scores = self._scores()

    # define segment-wise thresholds methods
    # ===================================================
    # methods to define the threshold value for outlier detection on each segment
    def _th_hard(self, seg, th=0.25):
        """ Define hard coded threshold levels on each segment.

        :param seg: array of shape (# segments, nperseg)
        :type seg: numpy.ndarray
        :param th: threshold settings
        :type th: numpy.ndarray
        :return: threshold level 
        :rtype: numpy.ndarray
        """
        return th * np.ones((np.max(seg.shape),))

    def _th_med(self, seg, th=1.15):
        """ Define segment threshold values based on the median value of segment.

        :param seg: array of shape (# segments, nperseg)
        :type seg: numpy.ndarray
        :param th: threshold settings
        :type th: numpy.ndarray
        :return: threshold level 
        :rtype: numpy.ndarray
        """
        return th * np.median(seg, axis=1)

    def _th_sig(self, seg, th=3):
        """ Define a segement wise scale estimate of the standard deviation based on the MAD.
        
        :param seg: array of shape (# segments, nperseg)
        :type seg: numpy.ndarray
        :param th: threshold factor
        :type th: numpy.ndarray
        :param avoid_implosion: bool/switch for using median filter to filter implosion sequences.
        :return: threshold level 
        :rtype: numpy.ndarray
        """
        # naive defintion of segment threshold levels based on MAD scale estimate 
        med = np.median(seg, axis=1)
        med = np.reshape(med, (med.shape[0],1))
        mad = np.median(np.abs(seg - med), axis=1)
        mad = np.reshape(mad, (med.shape[0],1))
        return th * 1.4826 * mad

    # Median filter to prevent segments from imploding
    # ===================================================
    def _avoid_implosion(self, th, percentile_th=95, th_fact=2):
        """ Use additional median filter to remove thresholds generated by potential implosive sequences.
        
        :param th: threshold values per segment
        :type th: numpy.ndarray
        :param percentile_th: percentile bound on MAD scale estimates
        :type percentile_th: float
        :param th_fact: scales percentile bound to arrive at final threhsold level which is used to filter out implosion sequences
        :type th_fact: float
        :return: threshold level 
        :rtype: numpy.ndarray
        """
        _th = th_fact*np.percentile(th, percentile_th)
        th_med = np.median(th)
        th[th>_th] = th_med
        return th

    # define replacement methods
    # ===================================================
    def _re_med(self, seg, th=None):
        """Replace outliers in segment by median of outlier free samples in segment.

        :param seg: array of shape (# segments, nperseg)
        :type seg: numpy.ndarray
        :param th: threshold values per segment
        :type th: numpy.ndarray
        :return: array containing outlier replacements for a segment
        :rtype: numpy.ndarray
        """
        # if th is None:
        return np.median(seg, axis=1)
        # else:
        #    return np.asarray([np.median(s[np.abs(s) <= t]) for s, t in zip(seg, th)])

    def _re_rnd(self, seg, th=None):
        """Replace outliers in segment by random samples from outlier free samples in segment.

        :param seg: array of shape (# segments, nperseg)
        :type seg: numpy.ndarray
        :param th: threshold values per segment
        :type th: numpy.ndarray
        :return: array containing outlier replacements for a segment
        :rtype: numpy.ndarray
        """
        if th is None:
            return np.asarray([np.random.choice(s, 1) for s in seg])
        else:
            return np.asarray([np.random.choice(s[np.abs(s) <= t], 1) for s, t in zip(seg, th)])

    def _re_plf(self, seg, th):
        """Replace outliers in segment by linear regression on outlier free samples in segment.
        
        :param seg: array of shape (# segments, nperseg)
        :type seg: numpy.ndarray
        :param th: threshold values per segment
        :type th: numpy.ndarray
        :return: array containing outlier replacements for a segment
        :rtype: numpy.ndarray
        """
        t = range(seg.shape[1])
        t = np.asarray(t)
        rep = np.copy(seg)
        for r, th_val in zip(rep, th):
            idx_i = np.abs(r) <= th_val
            idx_o = np.abs(r) > th_val
            if len(idx_o) > 0:
                r = np.asarray(r)
                if self.settings is not None:
                    pfit = np.polyfit(t[idx_i], r[idx_i], self.settings)
                else:
                    pfit = np.polyfit(t[idx_i], r[idx_i], deg=1)
                r[idx_o] = np.polyval(pfit, t[idx_o])
        return rep


    # reshape array into array  holding segments as rows
    # ===================================================
    def _seg_array(self, arr, nperseg):
        """ Reshape array into row-wise segments of length nperseg. A zero pad will be appended to the last segment in 
        order to keep the maintain correct dimensions.

        :param arr: 1-dimensional input array of arbitrary length.
        :type arr: numpy.ndarray
        :param nperseg: length of segments.
        :type nperseg: float
        :return: reshaped array holding segments as rows
        :rtype: numpy.ndarray
        """
        data = arr.flatten()
        nsegs = int(np.ceil(np.max(arr.shape) / nperseg))
        data.resize((nsegs, nperseg))
        return data


    # Segments-wise processing of data
    # ===================================================
    def reconstruction_pipe(self, arr, nperseg, avoid_implosion=True):
        """ Perform outlier detection and replacement on array. Segment array into shorter arrays.
        find outliers in each segment using a "threshold method", and replace samples in each
        segment using a "replacement method".

        :param arr: 1-dimensional input array (the sample wise difference signal in this application).
        :type arr: numpy.ndarray
        :param nperseg: length of segments to be used for outlier detection and replacement.
        :type nperseg: int
        :param avoid_implosion: switch for deciding wether additional median filter should be applied on derived threshold levels
        :type avoid_implosion: Bool 
        :return: reconstructed signal
        :rtype: numpy.ndarray
        """
        # prepare data for processing:
        seg_arr = self._seg_array(arr=arr, nperseg=nperseg)
        th_arr = self.th(seg=seg_arr, th=self.th_val)
        if self.avoid_implosion:
                th_arr = self._avoid_implosion(th=th_arr)
        re_arr = self.re(seg=seg_arr, th=th_arr)

        # Outlier replacement
        segs_new = np.asarray(list(map(lambda _s, _t, _r: np.where(np.abs(_s) > np.abs(_t), _r, _s),
                                       seg_arr, th_arr, re_arr)))
        rec = segs_new.flatten()
        rec = rec[0:np.max(arr.shape)]

        # setup labels from outlier detection:
        labels_lst = list()
        labels = np.asarray(list(map(lambda _s, _t: np.where(np.abs(_s) > np.abs(_t), -1, 0),
                                     seg_arr, th_arr)))
        labels = labels.flatten()
        labels_lst.append(labels[0:np.max(arr.shape)])
        self.labels = labels_lst
        self.th_arr = th_arr
        self.label_indices()

        return rec

    # some additional information which can be used for plotting
    # ===================================================
    def label_indices(self):
        """Generare a lists of indices corresponding to all inliers and outliers respectively.

        :return: indices for in- and outliers
        :rtype: list
        """
        idx_i = list()
        idx_o = list()
        for lab in self.labels:
            idx_i.append((lab == -1).astype(bool))
            idx_o.append((lab == 0).astype(bool))
        self.idx_i = idx_i
        self.idx_o = idx_o

    def _clusters(self):
        """Generate list of samples corresponding to all inliers and outliers respectively.

        :return: list of lists, holding indices corresponding to inliers and outliers respectively 
        :rtype: list
        """
        clusters = list()
        for labels in self.labels:
            clusters.append(
                [[k for k, li in enumerate(labels.astype(list)) if li == lu] for lu in list(set(labels))])
        return clusters

    def _counts(self):
        """Generate list containing counts of inliers and outliers

        :return: List of lists, containing counts of inliers and outliers
        :rtype: list
        """
        counts = [[len(cl) for cl in clusters] for clusters in self.clusters]
        return counts

    def _scores(self):
        """Generate list containing the ratio between outliers and the total amount of samples.

        :return: List containing ratio between outliers and total number of samples
        :rtype: list
        """
        scores = [counts[1] / (counts[0] + counts[1]) if len(counts) > 1 else -1 for counts in self.counts]
        return scores

