import os
import pickle as pk
import json
from numpy import mean
import logging
from tqdm import tqdm
import pandas as pd
from utils import *
from Brothers import Brother
import pdb
from datetime import datetime

logger = logging.getLogger('delta')
streamhandler = logging.StreamHandler()
logger.addHandler(streamhandler)
logger.setLevel(logging.INFO)



if __name__ == "__main__":

    inactive_list_path = '../inactive_task/inactive_pkg_repo_list.json'
    with open(inactive_list_path,'r') as f:
        inactive_list = json.load(f)

    with open('./releases_info.pkl', 'rb') as f:
        releases_info = pk.load(f)


    # mapping repo to package or package to repo
    inactive_repo2pkg = dict()
    inactive_pkg2repo = dict()
    for pkg, repo in inactive_list:
        inactive_repo2pkg[repo] = pkg
        inactive_pkg2repo[pkg] = repo


    deps_fpaths = [
        '../ghd_dataset/dataset/pypi.cache/dependencies.csv',
        '../ghd_dataset/dataset/pypi.cache/dependencies_sv1.csv'
    ]

    metadata_path = '../inactive_task/metadata'
    intermediates_path = './reverse_deps'


    rev = Reverser( metadata_path=metadata_path,
                    intermediates_path=intermediates_path,
                    use_intermediates=True,
                    )
    
    bro = Brother(
        deps_fpaths= deps_fpaths,
        reversed_deps= rev.reversed_deps,
        releases_info= releases_info
    )
    pdb.set_trace()
    # father_set = bro.get_direct_dependencies((inactive_repo2pkg["ailionx/cloudflare-ddns"], releases_info[inactive_repo2pkg["ailionx/cloudflare-ddns"]]["latest"]))
    # print(father_set)
    # test_name_ver = ("shap","0.39.0")
    # son_dict = bro.get_direct_dependents(test_name_ver, latest_only=False)
    # print(son_dict)
    # latest_cnt = non_latest_cnt = mixed_cnt = 0
    # for package, state in son_dict.items():
    #     if state =="latest":
    #         latest_cnt+=1
    #     elif state == "non-latest":
    #         non_latest_cnt+=1
    #     elif state == "mixed":
    #         mixed_cnt+=1
    # print(latest_cnt, non_latest_cnt, mixed_cnt)
    # t0 = datetime.now()
    # bro_dict = bro.get_brothers(test_name_ver, False, True)
    # t1 = datetime.now()
    # print(t1-t0)
    # print(bro_dict)
    # pdb.set_trace()
    zero_stars_deprecation = 0
    no_dependents = 0
    labeled_cnt = 0 
    
    file_path = "../inactive_task/deprecation_info.csv"
    df = pd.read_csv(file_path)
    # pdb.set_trace()
    # df = df.drop(["delta_of_downstream",
    #         "latest_downstream",
    #         "non-latest_downstream",
    #         "mixed_downstream"],
    #         axis=1)
    # df.drop(column = "latest_downstream")
    # df.drop("non-latest_downstream")
    # df.drop("mixed_downstream")

    if "release_amount" not in df.columns:
        logger.info("Add release infomation to csv")
        df.insert(6, "release_amount", 0)
        for _,row in tqdm(df.iterrows()):
            package_name = inactive_repo2pkg[row["repo_name"]]
        # pdb.set_trace()
            df.iloc[_,6] = len(releases_info[package_name]["releases"])
        df.to_csv(file_path, index=False)
    if "delta_of_downstream" not in df.columns:
        logger.info("Add downstream information to csv")
        df.insert(7, "delta_of_downstream", 0)
        df.insert(8, "latest_downstream", 0)
        df.insert(9, "non-latest_downstream", 0)
        df.insert(10, "mixed_downstream", 0)

        pure_df = pd.DataFrame()
        for _,row in tqdm(df.iterrows()):
        # if row["deprecation"] != " ":
            # labeled_cnt += 1
        # if row["deprecation"] != " " and row["stars"]!=0:
            package_name = inactive_repo2pkg[row["repo_name"]]
            latest_version = releases_info[package_name]["latest"]
            name_version = (package_name, latest_version)
            current_son_dict = bro.get_direct_dependents(name_version, False)
            latest_cnt = non_latest_cnt = mixed_cnt = 0
            if len(current_son_dict)==0:
                no_dependents +=1
            for package, state in current_son_dict.items():
                if state =="latest":
                    latest_cnt+=1
                elif state == "non-latest":
                    non_latest_cnt+=1
                elif state == "mixed":
                    mixed_cnt+=1
            df.iloc[_,7] = latest_cnt-non_latest_cnt
            df.iloc[_,8] = latest_cnt
            df.iloc[_,9] = non_latest_cnt
            df.iloc[_,10] = mixed_cnt
            if row["deprecation"] != " ":
                pure_df = pd.concat([pure_df,df.iloc[_].to_frame().T], ignore_index= True)

                # print("deprecation_status: ",row["deprecation"], latest_cnt-non_latest_cnt, latest_cnt, non_latest_cnt, mixed_cnt)
            # print(f"{package_name} deprecation_status: {row['deprecation']}, stars: {row['stars']}, minus: {latest_cnt-non_latest_cnt}, {latest_cnt}, {non_latest_cnt}, {mixed_cnt}")
        # elif row["stars"] == 0 and row["deprecation"] == 1:
        #     zero_stars_deprecation +=1
        pure_df.to_csv("../inactive_task/labeled_info.csv", index = False)
        df.to_csv(file_path, index=False)

    # print(f"zero_stars deprecation: {zero_stars_deprecation}")
    # print(f"no_dependents: {no_dependents}")
    # print(f"labeled cnt: {labeled_cnt}")
    # left_repo = dict()
    # for idx, row in df.iterrows():
    #     if row["delta_of_downstream"]<-2 and row["deprecation"] == " ":
    #         print(row["repo_name"], row["delta_of_downstream"])
    #         left_repo[row["repo_name"]] =  row["delta_of_downstream"]
    #         with open("../inactive_task/left_repo.pkl",'wb') as f:
    #             pk.dump(left_repo, f)
    negative_delta = 0
    positive_delta = 0
    pure_df = pd.read_csv("../inactive_task/labeled_info.csv")
    for _,row in pure_df.iterrows():
        if row["deprecation"] ==0:

            if row["delta_of_downstream"] < 0:
                negative_delta+=1
            elif row["delta_of_downstream"] >0:
                positive_delta+=1

    # pdb.set_trace()

    brother_dict  = dict()
    for _,row in tqdm(pure_df.iterrows()):
        if row["deprecation"] == 1:

            package_name = inactive_repo2pkg[row["repo_name"]]
            latest_version = releases_info[package_name]["latest"]
            latest_name_ver = (package_name, latest_version)
            current_bro_dict = bro.get_brothers(latest_name_ver, True, True)
            brother_dict[package_name] = current_bro_dict
    with open("../inactive_task/deprecated_brothers.pkl","wb") as f:
        pk.dump(brother_dict, f)
    pdb.set_trace()
