import Bio.Seq as Seq
import numpy as np
import collections.abc as abc
import screed
import logging
import os

from typing import Iterable, TypeVar, BinaryIO, Callable, Optional, Generator, Dict
from numba import jit

logger = logging.getLogger(__name__)

# Standard offset for quality scores in FastQ
PHRED_33 = 33

# Type hints
CharType = TypeVar('CharType', str, bytes)
SeqType = TypeVar('SeqType', str, bytes, np.ndarray)
QualType = TypeVar('QualType', str, bytes, Iterable[int], np.ndarray)

# translation tables used in complementation
COMPLEMENT_TABLE_BYTES = bytes.maketrans(b'acgtumrwsykvhdbnACGTUMRWSYKVHDBN', b'TGCAAnnnnnnnnnnnTGCAANNNNNNNNNNN')
COMPLEMENT_TABLE_BYTES_NUMPY = np.asarray(memoryview(COMPLEMENT_TABLE_BYTES), dtype=np.uint8)

# for string records emmitted from IndexedReader
AMBIGUOUS_CONVERSION_TABLE = str.maketrans('mrwsykvhdbMRWSYKVHDB', 'nnnnnnnnnnNNNNNNNNNN')


def ambiguous_to_n(name: str, seq: str) -> str:
    """
    Translate IUPAC ambiguity codes to N
    :param name: object name for logging
    :param seq: string sequence to translate
    :return: translated sequence as a str
    """
    out = str.translate(seq, AMBIGUOUS_CONVERSION_TABLE)
    if seq != out:
        logger.debug('Translated ambiguous nucleotides to N in {}'.format(name))
    return out


@jit(nopython=True)
def complement_array(s: np.ndarray) -> np.ndarray:
    """
    Reverse complement a numpy array holding a byte representation of a DNA sequence
    :param s: the ndarray to reverse complement
    :return: a new reverse complemented copy
    """
    return np.array([COMPLEMENT_TABLE_BYTES_NUMPY[s[i]] for i in range(len(s))], dtype=np.uint8)


class Sequence(object):

    @staticmethod
    def __encode(v: SeqType) -> bytes:
        """
        Convert strings to byte representation
        :param v: a potential string
        :return: bytes or v if already bytes
        """
        if isinstance(v, str):
            return v.encode()
        return v

    def __str__(self) -> str:
        """
        :return: a string representation of the DNA sequence
        """
        return self.seq.tobytes().decode()

    def __len__(self) -> int:
        """
        :return: the nucleotide length of the sequence
        """
        return self.seq.shape[0]

    def __iadd__(self, other) -> 'Sequence':
        """
        Append a new sequence to the end of this instance.
        :param other: the sequence to append
        :return: the combined sequence as a copy
        """
        if not isinstance(other, Sequence):
            raise TypeError(f'Incompatible types in add {type(self)} and {type(other)}')
        self.seq = np.concatenate((self.seq, other.seq))
        self.qual = np.concatenate((self.qual, other.qual))
        return self

    def __add__(self, other) -> 'Sequence':
        """
        Join two sequences in a similar fashion to lists
        :param other: another sequence
        :return: the combined result as a copy
        """
        if not isinstance(other, Sequence):
            raise TypeError(f'Incompatible types in add {type(self)} and {type(other)}')
        _qual = None
        if self.qual is not None:
            _qual = np.concatenate((self.qual, other.qual))
        return Sequence(self.name,
                        np.concatenate((self.seq, other.seq)),
                        description=self.description,
                        qual=_qual)

    def __invert__(self) -> 'Sequence':
        """
        Using the ~ operator as a reverse-complement operator

        :return: reverse complement the sequence as a copy
        """
        return self.revcomp()

    def __getitem__(self, item):
        """
        Slicing and indexing support of the sequence.
        :param item: an index or slice object
        :return: a new sliced or single index Sequence as a copy
        """
        if self.qual is not None:
            return Sequence(self.name, self.seq[item], qual=self.qual[item], description=self.description)
        else:
            return Sequence(self.name, self.seq[item], description=self.description)

    def __init__(self, name: CharType, seq: SeqType,
                 description: CharType = None, qual: QualType = None,
                 tags: Optional[Dict[str, str]] = None):
        """
        :param name: the name of the sequence
        :param seq: the DNA sequence
        :param description: a description of the sequence
        :param qual: the optional quality scores
        """
        self.name = Sequence.__encode(name)

        self.tags = {}
        if tags is not None:
            self.tags = tags

        if description is not None:
            self.description = Sequence.__encode(description)
        else:
            self.description = b''

        if isinstance(seq, np.ndarray):
            self.seq = np.asarray(seq, dtype=np.uint8)
        else:
            self.seq = np.asarray(memoryview(Sequence.__encode(seq)), dtype=np.uint8)

        if qual is not None:
            print('qual', qual.__class__)
            assert len(seq) == len(qual), 'seq and qual parameters must be the same length'
            if isinstance(qual, (str, bytes)):
                self.qual = np.asarray(memoryview(Sequence.__encode(qual)), dtype=np.uint8)
            elif isinstance(qual, (np.ndarray, abc.Iterable)):
                self.qual = np.asarray(qual, dtype=np.uint8)
            self.write = self.write_fastq
        else:
            self.qual = None
            self.write = self.write_fasta

    def encode_qual(self) -> bytes:
        """
        Standard FastQ offset (33) of quality scores as bytes
        :return: a bytes representation of the quality scores
        """
        return (self.qual + PHRED_33).tobytes()

    def revcomp(self) -> 'Sequence':
        """
        :return: a new reverse complemented sequence
        """
        if self.qual is None:
            return Sequence(self.name, complement_array(self.seq)[::-1])
        else:
            return Sequence(self.name, complement_array(self.seq)[::-1], self.qual[::-1])

    def copy(self) -> 'Sequence':
        _qual = None
        if self.qual is not None:
            _qual = self.qual.copy()
        return Sequence(self.name, self.seq.copy(), description=self.description, qual=_qual)

    def to_bioseq(self):
        """
        :return: return Bio.Seq object
        """
        return Seq.Seq(self.__str__())

    def write_fastq(self, fn: BinaryIO):
        """
        Write a Sequence object to a output stream in FastQ format. This is automatically
        selected at instantiation for the method Sequence.write() if quality scores are present.

        :param fn: the output file handle
        """
        fn.write(b'@%b %b\n%b\n+\n%b\n' % (self.name, self.description, self.seq.tobytes(), self.encode_qual()))

    def write_fasta(self, fn: BinaryIO):
        """
        Write a Sequence object to a output stream in FastA format. This is automatically
        selected at instantiation for the method Sequence.write() if no quality scores
        were supplied.

        :param fn: the output file handle
        """
        fn.write(b'>%b%b\n%b\n' % (self.name, self.description, self.seq.tobytes()))


class IndexedReader(abc.Mapping):

    def __init__(self, file_name: str, seq_filter: Optional[Callable[[str, str], str]] = None):
        """
        Indexed access a FastA or FastQ file using Screed.

        :param file_name: the FastA or FastQ file to access
        :param seq_filter: an optional filter applied to emitted sequences
        """

        db_path = f'{file_name}_screed'
        logger.debug(f'Indexed sequence database will be: {db_path}')

        if os.path.exists(db_path):
            f_stat = os.stat(file_name)
            db_stat = os.stat(db_path)
            if f_stat.st_mtime_ns >= db_stat.st_mtime_ns:
                logger.warning(f'Reference newer than index, removing stale index')
                os.remove(db_path)

        if not os.path.exists(db_path):
            logger.debug('Creating indexed sequence database')
            screed.make_db(file_name)
        else:
            logger.debug('Using existing indexed sequence database')

        self.seq_db = screed.ScreedDB(file_name)
        self.seq_filter = seq_filter

    def __getitem__(self, item: str) -> Sequence:
        record = self.seq_db[item]

        # pull out the full sequence record always
        _seq = str(record['sequence'])
        # optionally apply a filter
        if self.seq_filter is not None:
            _seq = self.seq_filter(record['name'], _seq)

        _desc = None
        if 'annotations' in record:
            _desc = record['annotations']
        elif 'description' in record:
            _desc = record['description']

        _qual = None
        if 'quality' in record:
            _qual = record['quality'].encode()

        return Sequence(record['name'], _seq, description=_desc, qual=_qual)

    def __iter__(self) -> Generator[str, None, None]:
        return self.seq_db.iterkeys()

    def __len__(self) -> int:
        return self.seq_db.__len__()

    def remove_db(self):
        fname = self.seq_db._filepath
        if os.path.exists(fname) and os.path.isfile(fname):
            logger.debug('Removing screed database file {}'.format(fname))
            os.remove(fname)
        else:
            logger.warning('The database file {} did not exist'.format(fname))

    def __enter__(self):
        return self

    def __exit__(self, exc_type, exc_val, exc_tb):
        if exc_type is None:
            self.seq_db.close()


def open_screed_reader(file_name: str, parse_description: bool = True) -> screed.openscreed.Open:
    return screed.open(file_name, parse_description=parse_description)
