import pickle as pk
import pandas as pd
import json
import pdb
from tqdm import tqdm
from copy import deepcopy

file_path = "../inactive_task/deprecation_info.csv"
pure_path = "../inactive_task/labeled_info.csv"
brothers_path = "../inactive_task/deprecated_brothers.pkl"
inactive_list_path = '../inactive_task/inactive_pkg_repo_list.json'
with open(inactive_list_path,'r') as f:
    inactive_list = json.load(f)

# mapping repo to package or package to repo
inactive_repo2pkg = dict()
inactive_pkg2repo = dict()

for pkg, repo in inactive_list:
    inactive_repo2pkg[repo] = pkg
    inactive_pkg2repo[pkg] = repo

df = pd.read_csv(file_path)
pure_df = pd.read_csv(pure_path)

with open(brothers_path,'rb') as f:
    bro_dic = pk.load(f)

def find_similar_brothers(package_name, brothers):
    brother_list = []
    repo_name = inactive_pkg2repo[package_name]
    package_stars = df.loc[df["repo_name"]==repo_name, "stars"].iloc[0]

    tmp_dict = dict()
    for bro in brothers:
        if bro not in inactive_pkg2repo:
            continue
        bro_repo_name = inactive_pkg2repo[bro]
        if bro_repo_name == package_name:
            continue
        bro_stars = df.loc[df["repo_name"]==bro_repo_name, "stars"].iloc[0]
        tmp_dict[bro_repo_name] = abs(bro_stars-package_stars)
    
    brother_list= sorted(tmp_dict, reverse=True)
    # return brother_list[:min(len(tmp_dict), brother_num)]
    return brother_list
        

if __name__ == "__main__":
    deprecated_and_brothers = set()
    deprecated_brother = dict()
    deprecated_bros_df = pd.DataFrame()
    brother_num = 5
    for _,row in tqdm(pure_df.iterrows()):
        if row["deprecation"] == 1:
            package_name = inactive_repo2pkg[row["repo_name"]]
            if row["repo_name"] not in deprecated_and_brothers:
                deprecated_and_brothers.add(row["repo_name"])
                deprecated_bros_df = pd.concat([deprecated_bros_df, row.to_frame().T], ignore_index= True)
            brother_list = find_similar_brothers(package_name=package_name, 
                                                 brothers = list(bro_dic[package_name].keys()),
                                                 )
            deprecated_brother[row["repo_name"]] = deepcopy(brother_list)
            for bro in brother_list[:min(len(brother_list), brother_num)]:
                if bro not in deprecated_and_brothers:
                    deprecated_and_brothers.add(bro)
                    deprecated_bros_df = pd.concat([deprecated_bros_df, df.loc[df["repo_name"]==bro]], ignore_index= True)
    deprecated_bros_df.to_csv("../inactive_task/deprecated_bros.csv", index= False)
    with open("../inactive_task/deprecated_full_bros.pkl","wb") as f:
        pk.dump(deprecated_brother, f)
    pdb.set_trace()