import openpyxl
import os
import logging
from pathlib import Path
import calendar
import concurrent.futures
import subprocess
from datetime import datetime, timedelta, date, timezone
from collections import OrderedDict
from dataclasses import dataclass

cwd = os.getcwd()
root_path = Path(cwd).parents[0]
print(root_path)
status_cell = 2
start_date_cell = 5
end_date_cell = 6
repo_url_cell = 7


@dataclass
class CommitHashDate:
    hash_start: str
    hash_end: str
    start_date: str
    end_date: str

    def __repr__(self) -> str:
        return "hash_start: {}, hash_end: {}, start_date: {}, end_date: {}\n".format(
            self.hash_start, self.hash_end, self.start_date, self.end_date
        )

    def __str__(self) -> str:
        return "hash_start: {}, hash_end: {}, start_date: {}, end_date: {}\n".format(
            self.hash_start, self.hash_end, self.start_date, self.end_date
        )


class RepoMetadata:
    def __init__(self, repo: str, status: str, start_date, end_date) -> None:
        self.repo = repo
        self.status = status
        self.start_date = start_date
        self.end_date = end_date

    def __repr__(self):
        return "repo: {} start_date: {} end_date: {} status: {}".format(
            self.repo, self.start_date, self.end_date, self.status
        )


class Data:
    def __init__(self, repo, start_date, end_date) -> None:
        self.repo = repo
        self.start_date = start_date
        self.end_date = end_date

    def __repr__(self):
        return "repo: {} start_date: {} end_date: {}".format(
            self.repo, self.start_date, self.end_date
        )


class MonthlyLastCommit:
    def __init__(self, year, month, last_commit):
        self.year = year
        self.month = month
        self.last_commit = last_commit

    def __repr__(self):
        return "year: {} month: {} last_commit: {}".format(
            self.year, self.month, self.last_commit
        )


def get_repo_start_end_date():
    sheet = openpyxl.load_workbook(
        os.path.join(root_path, "projects-info-from-podlings-xml-extra-metadata.xlsx")
    ).active

    rows = list(sheet.rows)

    all_repos = list()
    start_dates = list()
    end_dates = list()
    i = 0

    logging.info("Finding all repos and their start and end dates from EXCEL sheet")
    for row in range(2, sheet.max_row + 1):

        repo_url = sheet[row][repo_url_cell].value
        status = sheet[row][status_cell].value.strip()
        if repo_url is not None and (
            status == "graduated" or status == "retired"
        ):  # we are only interested in retired/graduated projects
            all_repos.append(
                Data(
                    repo_url.rsplit("/", 1)[1],
                    sheet[row][start_date_cell].value,
                    sheet[row][end_date_cell].value,
                )
            )

    return all_repos


def get_repo_metadata():
    sheet = openpyxl.load_workbook(
        os.path.join(root_path, "projects-info-from-podlings-xml-extra-metadata.xlsx")
    ).active

    rows = list(sheet.rows)

    all_repos = list()
    start_dates = list()
    end_dates = list()
    i = 0

    logging.info("Finding all repos and some metadata from EXCEL sheet")
    for row in range(2, sheet.max_row + 1):

        repo_url = sheet[row][repo_url_cell].value
        status = sheet[row][status_cell].value.strip()
        if repo_url is not None and (
            status == "graduated" or status == "retired"
        ):  # we are only interested in retired/graduated projects
            all_repos.append(
                RepoMetadata(
                    repo_url.rsplit("/", 1)[1],
                    status,
                    sheet[row][start_date_cell].value,
                    sheet[row][end_date_cell].value,
                )
            )

    return all_repos


def get_last_monthly_commits(repo_path, start_date, end_date):
    if end_date is None:
        end_date = datetime.now().strftime("%Y-%m-%d")
    start_date_split = start_date.split("-")
    end_date_split = end_date.split("-")

    start_date_as_date = date(
        int(start_date_split[0]), int(start_date_split[1]), int(start_date_split[2])
    )
    end_date_as_date = date(
        int(end_date_split[0]), int(end_date_split[1]), int(end_date_split[2])
    )
    # the following line only works in Pyton 3.7
    # months = OrderedDict(((date.fromisoformat(start_date) + timedelta(_)).strftime("%Y-%m-01"),0) for _ in range((date.fromisoformat(end_date) - date.fromisoformat(start_date)).days))

    months = OrderedDict(
        ((start_date_as_date + timedelta(_)).strftime("%Y-%m-01"), 0)
        for _ in range((end_date_as_date - start_date_as_date).days)
    )
    months_to_analyze = list(months)

    months_commits = list()

    # os.chdir(repo_path)

    for month in months_to_analyze[:-1]:

        split = month.split("-")
        last_day = calendar.monthrange(int(split[0]), int(split[1]))[1]
        end_date_str = "{}-{}-{}".format(split[0], split[1], last_day)

        proc = subprocess.Popen(
            [
                "git",
                "log",
                "-1",
                '--before="{} 23:59:59"'.format(end_date_str),
                '--format="%H"',
            ],
            cwd=repo_path,
            stdout=subprocess.PIPE,
            stderr=subprocess.PIPE,
        )
        stdout = proc.communicate()[0]
        commit_hash = stdout.decode("utf-8").replace('"', "").strip()
        if commit_hash != "":  # sometimes the first month does not have any commits
            months_commits.append(MonthlyLastCommit(split[0], split[1], commit_hash))

    # last month, which ends on the end-date of the project
    proc = subprocess.Popen(
        [
            "git",
            "log",
            "-1",
            '--before="{} 23:59:59"'.format(end_date),
            '--format="%H"',
        ],
        cwd=repo_path,
        stdout=subprocess.PIPE,
        stderr=subprocess.PIPE,
    )
    stdout = proc.communicate()[0]
    commit_hash = stdout.decode("utf-8").replace('"', "").strip()

    months_commits.append(
        MonthlyLastCommit(end_date.split("-")[0], end_date.split("-")[1], commit_hash)
    )

    return months_commits
    # for the last month, we need to do it separately like this


def _incubation_months(start_date, end_date):
    """
    Returns the number of months the project was in incubation
    """
    if end_date is None:
        end_date = datetime.now().strftime("%Y-%m-%d")

    start_date_split = start_date.split("-")
    end_date_split = end_date.split("-")
    start_date_as_date = date(
        int(start_date_split[0]), int(start_date_split[1]), int(start_date_split[2])
    )
    end_date_as_date = date(
        int(end_date_split[0]), int(end_date_split[1]), int(end_date_split[2])
    )
    # months = OrderedDict(((date.fromisoformat(start_date) + timedelta(_)).strftime("%Y-%m-01"),0) for _ in range((date.fromisoformat(end_date) - date.fromisoformat(start_date)).days))
    months = OrderedDict(
        ((start_date_as_date + timedelta(_)).strftime("%Y-%m-01"), 0)
        for _ in range((end_date_as_date - start_date_as_date).days)
    )
    months_to_analyze = list(months)
    return months_to_analyze


def get_start_end_commit_hash(repo_path, start_date, end_date):

    if end_date is None:
        end_date = datetime.now()

    proc = subprocess.Popen(
        [
            "git",
            "log",
            "--since={}".format(start_date),
            "--until={}".format(end_date),
            '--format="%H"',
        ],
        cwd=repo_path,
        stdout=subprocess.PIPE,
        stderr=subprocess.PIPE,
    )
    # print(repo_path)
    # print(start_date, end_date)
    stdout = proc.communicate()[0]
    commit_hash = stdout.decode("utf-8").splitlines()
    if len(commit_hash) == 0:
        logging.error("no commits {}. start date {}, end date {}".format(repo_path, start_date, end_date))
        return None
    # last element is the start of the month commit, first element is the end of the month commit
    return (commit_hash[-1].replace('"', ""), commit_hash[0].replace('"', ""))


def add_commit_to_data(months_commits, start_date, end_date, sha_start, sha_end):
    months_commits.append(CommitHashDate(sha_start, sha_end, start_date, end_date))


def monthly_start_end_commit_hashes(repo_path, start_date, end_date):
    months_to_analyze = _incubation_months(start_date, end_date)
    months_commits = list()

    # these are for the first month. the reasoning here is that we might have only a few days or so if the project started on i.e., 2006-03-28, so we're trying to be precise
    split = start_date.split("-")
    last_day = calendar.monthrange(int(split[0]), int(split[1]))[1]
    end_date_str = "{}-{}-{}".format(split[0], split[1], last_day)

    # logger.info("First month commits")
    commits = get_start_end_commit_hash(repo_path, start_date, end_date_str)

    if commits is None:
        add_commit_to_data(months_commits, start_date, end_date_str, None, None)
    else:
        add_commit_to_data(
            months_commits, start_date, end_date_str, commits[0], commits[1]
        )

    # these are for the months in between
    for m in months_to_analyze[1:-1]:

        split = m.split("-")
        year = int(split[0])
        month = int(split[1])
        last_day = calendar.monthrange(year, month)[1]
        end_date_str = "{}-{}-{}".format(year, split[1], last_day)
        # logger.info("Analyzing month {} in {}".format(month, year))
        commits = get_start_end_commit_hash(repo_path, m, end_date_str)
        if commits is None:
            add_commit_to_data(months_commits, m, end_date_str, None, None)
        else:
            add_commit_to_data(months_commits, m, end_date_str, commits[0], commits[1])

    # this is for the last month, from the first day of the month and until the day of the end-date
    split = end_date.split("-")
    start_date_str = "{}-{}-01".format(split[0], split[1])
    # logger.info("Analyzing last month")
    commits = get_start_end_commit_hash(repo_path, start_date_str, end_date)
    if commits is None:
        add_commit_to_data(months_commits, start_date_str, end_date, None, None)
    else:
        add_commit_to_data(
            months_commits, start_date_str, end_date, commits[0], commits[1]
        )

    return months_commits


def fetch_commits(month, repo_path):
    """
    Fetch commits from the repository

    Parameters
    ----------
    month :
        The start and end date
    repo_path: str
        The absolute path to the Git repository

    """
    # git whatchanged is useful, but there's a trick; one needs to use the dates instead of hashes revA..revB because using hashes will not include the information from first commit, aka revA.
    return fetch_commits(repo_path, month.start_date, month.end_date)


def fetch_commits(repo_path, start_date, end_date):
    """
    Fetch commits from the repository

    Parameters
    ----------
    month :
        The start and end date
    repo_path: str
        The absolute path to the Git repository

    """
    # git whatchanged is useful, but there's a trick; one needs to use the dates instead of hashes revA..revB because using hashes will not include the information from first commit, aka revA.
    commits_data = subprocess.Popen(
        [
            "git",
            "whatchanged",
            "--no-merges",
            '--pretty=format:"%h%x09%an%x09%cn%x09%cd"',
            "--date=format:'%A,%Y-%m-%d'",
            "--since={}".format(start_date),
            "--until={}".format(end_date),
        ],
        cwd=repo_path,
        stderr=subprocess.PIPE,
        stdout=subprocess.PIPE,
    )
    data = commits_data.communicate()[0]
    # print(data)
    commits = data.decode("utf-8").split("\n\n")
    return commits


def checkout_main_branch(repo, repo_path):
    """
    Tries to do a git checkout on the master, main, or trunk branch
    """

    logging.info(
        "Resetting the repo {} to last commit on main/master/trunk branch".format(repo)
    )
    proc = subprocess.Popen(
        ["git", "checkout", "master"],
        cwd=repo_path,
        stdout=subprocess.PIPE,
        stderr=subprocess.PIPE,
    )
    proc.communicate()
    if proc.returncode == 0:
        logging.info("Successfully switch to master branch in {}".format(repo))
    if proc.returncode != 0:
        proc = subprocess.Popen(
            ["git", "checkout", "trunk"],
            cwd=repo_path,
            stdout=subprocess.PIPE,
            stderr=subprocess.PIPE,
        )
        proc.communicate()
        if proc.returncode == 0:
            logging.info("Successfully switch to trunk branch in {}".format(repo))
        if proc.returncode != 0:
            proc = subprocess.Popen(
                ["git", "checkout", "main"],
                cwd=repo_path,
                stdout=subprocess.PIPE,
                stderr=subprocess.PIPE,
            )
            proc.communicate()
            if proc.returncode == 0:
                logging.info("Successfully switch to main branch in {}".format(repo))
            if proc.returncode != 0:
                proc = subprocess.Popen(
                    ["git", "branch", "-v"],
                    cwd=repo_path,
                    stdout=subprocess.PIPE,
                    stderr=subprocess.PIPE,
                )
                output = proc.communicate()

                if (
                    output[0].decode("utf-8") != ""
                ):  # otherwise this seems to have no branches, might be an empty repo e.g., incubator-Guacamole

                    branch = output[0].decode("utf-8").splitlines()
                    if len(branch) > 1:
                        branch = branch[1].split(" ")[1]
                        proc = subprocess.Popen(
                            ["git", "checkout", branch],
                            cwd=repo_path,
                            stdout=subprocess.PIPE,
                            stderr=subprocess.PIPE,
                        )
                        proc.communicate()
                        if proc.returncode == 0:
                            logging.info(
                                "Successfully switch to {} branch in {}".format(
                                    branch, repo
                                )
                            )
                        if proc.returncode != 0:
                            logging.error("Cannot reset to master/main/trunk branch")


class DevStatistics:
    def __init__(self, name, commits, files_changed, insertions, deletions):
        self.name = name
        self.commits = commits
        self.files_changed = files_changed
        self.insertions = insertions
        self.deletions = deletions

    def __str__(self) -> str:
        return "{},{},{},{},{}".format(
            self.name, self.commits, self.files_changed, self.insertions, self.deletions
        )

    def __repr__(self) -> str:
        return "{},{},{},{},{}".format(
            self.name, self.commits, self.files_changed, self.insertions, self.deletions
        )


def _minority_majority_authors(stats):
    dev_stats = dict()
    stats = stats.split("author")

    for stat in stats:
        split = stat.splitlines()

        if len(split) == 1:
            author = split[0].strip().replace("'", "").replace('"', "").lower()
            author = author.strip().split(",")[0].lower().strip()
            if (
                author not in dev_stats
            ):  # we have cases when the first commit does not have any more information, except author!
                st = DevStatistics(author, 1, 0, 0, 0)
                dev_stats[author] = st
            else:
                st = dev_stats[author]
                st.commits = st.commits + 1
            continue
        if len(split) > 1:
            author = split[0].strip().replace("'", "").replace('"', "").lower()
            author = author.strip().split(",")[0].lower().strip()
            st = DevStatistics(author, 0, 0, 0, 0)
            if author in dev_stats:
                st = dev_stats[author]
            insertions = 0
            deletions = 0
            st.commits = st.commits + 1

            changes = split[1].split(",")
            files_changed = int(changes[0].split(" ")[1])
            if len(changes) == 2:  # either deletions and additions
                if "+" in changes[1]:
                    insertions = int(changes[1].split(" ")[1])
                if "-" in changes[1]:
                    deletions = int(changes[1].split(" ")[1])

            if (
                len(changes) == 3
            ):  # both deletions and additions, usually first is additions
                insertions = int(changes[1].split(" ")[1])
                deletions = int(changes[2].split(" ")[1])
            st.files_changed = st.files_changed + files_changed
            st.insertions = st.insertions + insertions
            st.deletions = st.deletions + deletions
            dev_stats[author] = st

    return dev_stats


def minority_majority_authors(month, repo_path):
    return minority_majority_authors(repo_path, month.start_date, month.end_date)


def minority_majority_authors(
    repo_path,
    start_date,
    end_date,
) -> dict:
    """
    Extracts the authors and number of commits, inserts, deletetions, files changed for each of them

    Parameters
    ----------
    repo_path : str
        The path to the repository

    start_date: str
        The start date of the project as a string %Y-%m-%d

    end_date: str
        The end date of the project as a string %Y-%m-%d

    Returns
        dict: key: author_name, values: :class:`utils.helpers.DevStatistics`
    """
    logging.info("Getting minority/majority committers stats")
    dev_stats = dict()
    data = subprocess.Popen(
        [
            "git",
            "log",
            "--no-merges",
            "--shortstat",
            "--pretty=format:author %an",
            "--since={}".format(start_date),
            "--until={}".format(end_date),
        ],
        cwd=repo_path,
        stderr=subprocess.PIPE,
        stdout=subprocess.PIPE,
    )
    stats = data.communicate()[0].decode("utf-8")
    return _minority_majority_authors(stats)


def hard_reset_main_branch(repo, repo_path):
    """
    Tries to do a git reset --hard on the master, main, or trunk branch
    """

    logging.info(
        "Doing a hard reset for the repo {} to last commit on main/master/trunk branch".format(
            repo
        )
    )
    proc = subprocess.Popen(
        ["git", "reset", "--hard", "master"],
        cwd=repo_path,
        stdout=subprocess.PIPE,
        stderr=subprocess.PIPE,
    )
    proc.communicate()
    if proc.returncode == 0:
        logging.info("Successfully switch to master branch in {}".format(repo))
    if proc.returncode != 0:
        proc = subprocess.Popen(
            ["git", "reset", "--hard", "trunk"],
            cwd=repo_path,
            stdout=subprocess.PIPE,
            stderr=subprocess.PIPE,
        )
        proc.communicate()
        if proc.returncode == 0:
            logging.info("Successfully switch to trunk branch in {}".format(repo))
        if proc.returncode != 0:
            proc = subprocess.Popen(
                ["git", "reset", "--hard", "main"],
                cwd=repo_path,
                stdout=subprocess.PIPE,
                stderr=subprocess.PIPE,
            )
            proc.communicate()
            if proc.returncode == 0:
                logging.info("Successfully switch to main branch in {}".format(repo))
            if proc.returncode != 0:
                proc = subprocess.Popen(
                    ["git", "branch", "-v"],
                    cwd=repo_path,
                    stdout=subprocess.PIPE,
                    stderr=subprocess.PIPE,
                )
                output = proc.communicate()

                if (
                    output[0].decode("utf-8") != ""
                ):  # otherwise this seems to have no branches, might be an empty repo e.g., incubator-Guacamole

                    branch = output[0].decode("utf-8").splitlines()
                    if len(branch) > 1:
                        branch = branch[1].split(" ")[1]
                        proc = subprocess.Popen(
                            ["git", "reset", "--hard", branch],
                            cwd=repo_path,
                            stdout=subprocess.PIPE,
                            stderr=subprocess.PIPE,
                        )
                        proc.communicate()
                        if proc.returncode == 0:
                            logging.info(
                                "Successfully switch to {} branch in {}".format(
                                    branch, repo
                                )
                            )
                        if proc.returncode != 0:
                            logging.error(
                                "Cannot hard reset {} to master/main/trunk branch".format(
                                    repo
                                )
                            )
