import pandas as pd
import re
from datetime import date
from ietfdata.rfcindex import *
import ietfdata.mailarchive2 as ma
from rfcerrata import RfcErrata
import json

rfc_index = RFCIndex()
c_errata = RfcErrata()
archive = ma.MailArchive()

ren = r'(?:\.?)([\w\-_+#~!$&\'\.]+(?<!\.)(@|[ ]?\(?[ ]?(at|AT)[ ]?\)?[ ]?)(?<!\.)[\w]+[\w\-\.]*\.[a-zA-Z-]{2,3})(?:[^\w])'
ren2 = r'([a-zA-Z0-9._-]+@[a-zA-Z0-9._-]+\.[a-zA-Z0-9_-]+)'

name_pid_dict = json.load(open('data/name_pid_dict.json'))
emailID_pid_dict = json.load(open('data/emailID_pid_dict.json'))
emailID_first_email = json.load(open('data/emailID_first_email.json'))
emailID_yearly_monthly_vol_dict = json.load(open('data/emailID_yearly_monthly_vol_dict.json'))
maillist_area_type_status = json.load(open('data/maillist_area_type_status.json'))
pid_emailID_dict = json.load(open('data/pid_emailID_dict.json'))

names_present = dict()#set()
names_notpresent = dict()#set()
unique_names = dict()#set()
names_RFC = dict()
for err in c_errata.errata:
    nm = err.submitter_name.lower()
    unique_names[nm] = unique_names.get(nm,0)+1 #.add(err.submitter_name.lower())
    if nm in name_pid_dict:
        names_present[nm] = names_present.get(nm,0) + 1
        if err.doc_id in names_RFC:
            names_RFC[err.doc_id].add(nm)
        else:
            names_RFC[err.doc_id] = set([nm])
    else:
        names_notpresent[nm] = names_notpresent.get(nm,0) + 1
        
rfc_date = dict()
rfc_area = dict()
rfc_wg = dict()
month_dict = {"January":1,"February":2,"March":3,"April":4,"May":5,"June":6,"July":7,"August":8,"September":9,"October":10,"November":11,"December":12}
for err in c_errata.errata:
    nm = err.submitter_name.lower()
    
    if nm in name_pid_dict and nm != "rfc editor":
        #print(err.doc_id)
        rfc_t = rfc_index.rfc(err.doc_id)
        rfc_date[err.doc_id] = tuple((month_dict[rfc_t.month],rfc_t.year))
        rfc_area[err.doc_id] = rfc_t.area
        rfc_wg[err.doc_id] = rfc_t.wg

df_mappings = pd.read_csv('data/personID_to_personID_graph_new_with_mlisttype.csv',delimiter='\t',header=0,names=["A_PersonID_From", "B_PersonID_To", "Type", "Time_since_1st_mail_A", "Time_since_1st_mail_B", "Max_Time_A", "Max_Time_B", "Interaction_timestamp", "MessageID_A", "MessageID_B", "Mailing_list", "Mailinglist_type"])

role_based_emailIDs = set()
with open('data/role_based_emailIDs_newData.txt') as f:
    Lines = f.readlines()
    for line in Lines:
        line = line.strip()
        role_based_emailIDs.add(line)

automated_list = set()
with open('data/automated_email_IDs_newData.txt') as f2:
    Lines2 = f2.readlines()
    for line in Lines2:
        line = line.strip()
        automated_list.add(line)

pid_rolebased = set([])
for eid in emailID_pid_dict:
    if eid in role_based_emailIDs:
        pid_rolebased.add(int(emailID_pid_dict[eid]))

c = 0

user_inarea = dict()
user_outarea = dict()

user_inwg = dict()
user_outwg = dict()

for rfc_ in rfc_date:
    for nm in names_RFC[rfc_]:
        pid = name_pid_dict[nm]

        #if m_area == area:

        if rfc_ in user_inarea:
            user_inarea[rfc_][pid] = 0 #user_inarea[rfc_].get(pid,0)+1
        else:
            user_inarea[rfc_] = {pid : 0}
        #else:
        if rfc_ in user_outarea:
            user_outarea[rfc_][pid] = 0#user_outarea[rfc_].get(pid,0)+1
        else:
            user_outarea[rfc_] = {pid : 0}

        if rfc_ in user_inwg:
            user_inwg[rfc_][pid] = 0#user_outarea[rfc_].get(pid,0)+1
        else:
            user_inwg[rfc_] = {pid : 0}

        if rfc_ in user_outwg:
            user_outwg[rfc_][pid] = 0#user_outarea[rfc_].get(pid,0)+1
        else:
            user_outwg[rfc_] = {pid : 0}
        
for mailing_list_name in archive.mailing_list_names():
    
    #print(mailing_list_name)
    print("\rMailingList %s, %d" % (mailing_list_name, len(user_inarea)), end = '')

    ml = archive.mailing_list(mailing_list_name)
    if ml:
        if ml.num_messages() > 0:
            for message in ml.messages():
                
                m_area = maillist_area_type_status[mailing_list_name]['area']
                try:
                    e = message.header_from()
                    e = e.replace("'", "__apostrophe__")
                    x = re.findall(ren, str(e))

                    if len(x) == 0:
                        x = re.findall(ren2, str(e))
                        if len(x) > 0:
                            email = x[0]
                    else:
                        email = x[0][0]

                    email = email.replace("__apostrophe__", "'").lower()

                    email = email.lower()

                    if '@' not in email:
                        email = email.replace(' at ','@').lower()

                    #c = 0
                    for rfc_ in rfc_date:#names_RFC:
                    #    c += 1
                        #print("\rMailingList %s, %d" % (mailing_list_name, c), end = '')

                        date_ = date(rfc_date[rfc_][1],rfc_date[rfc_][0],1)
                        if message.header_date().date() > date_ or (date_ - message.header_date().date()).days > 1095:
                            continue
                        
                        area = rfc_area[rfc_]
                        wg = rfc_wg[rfc_]

                        for nm in names_RFC[rfc_]:
                            pid = name_pid_dict[nm]

                            set_eids =  [k for k in pid_emailID_dict[str(pid)]]
                            if email not in set_eids:
                                continue

                            if m_area == area:
                                if rfc_ in user_inarea:
                                    user_inarea[rfc_][pid] = user_inarea[rfc_].get(pid,0)+1
                                else:
                                    user_inarea[rfc_] = {pid : 1}
                            else:
                                if rfc_ in user_outarea:
                                    user_outarea[rfc_][pid] = user_outarea[rfc_].get(pid,0)+1
                                else:
                                    user_outarea[rfc_] = {pid : 1}

                            if mailing_list_name == wg:
                                if rfc_ in user_inwg:
                                    user_inwg[rfc_][pid] = user_inwg[rfc_].get(pid,0)+1
                                else:
                                    user_inwg[rfc_] = {pid : 1}
                            else:
                                if rfc_ in user_outwg:
                                    user_outwg[rfc_][pid] = user_outwg[rfc_].get(pid,0)+1
                                else:
                                    user_outwg[rfc_] = {pid : 1}

                except Exception as e:
                    print(e,"ERR")
                    continue
            #print(mailing_list_name)
        else:
            #print(e)
            
            continue

inwg_emails = dict()

for rfc_ in rfc_date:
    
    if rfc_ not in user_inwg:
        continue
        
    if rfc_date[rfc_][1] <= 2007 and rfc_date[rfc_][1] > 2002:
        yr = 2007
    elif rfc_date[rfc_][1] <= 2012 and rfc_date[rfc_][1] > 2007:
        yr = 2012
    elif rfc_date[rfc_][1] <= 2017 and rfc_date[rfc_][1] > 2012:
        yr = 2017
    elif rfc_date[rfc_][1] <= 2022 and rfc_date[rfc_][1] > 2017:
        yr = 2022
    
    for pid in user_inwg[rfc_]:
        if yr in inwg_emails:
            inwg_emails[yr].append(user_inwg[rfc_][pid])
        else:
            inwg_emails[yr] = [user_inwg[rfc_][pid]]

outwg_emails = dict()

for rfc_ in rfc_date:
    
    if rfc_ not in user_outwg:
        continue
        
    if rfc_date[rfc_][1] <= 2007 and rfc_date[rfc_][1] > 2002:
        yr = 2007
    elif rfc_date[rfc_][1] <= 2012 and rfc_date[rfc_][1] > 2007:
        yr = 2012
    elif rfc_date[rfc_][1] <= 2017 and rfc_date[rfc_][1] > 2012:
        yr = 2017
    elif rfc_date[rfc_][1] <= 2022 and rfc_date[rfc_][1] > 2017:
        yr = 2022
    
    for pid in user_outwg[rfc_]:
        if yr in outwg_emails:
            outwg_emails[yr].append(user_outwg[rfc_][pid])
        else:
            outwg_emails[yr] = [user_outwg[rfc_][pid]]

# output

with open("data/email-discussion-inwg-emails.csv", "w") as inwgFile:
    for year in inwg_emails:
        print(f"{year},{','.join(map(str, inwg_emails[year]))}", file=inwgFile)

with open("data/email-discussion-outwg-emails.csv", "w") as outwgFile:
    for year in outwg_emails:
        print(f"{year},{','.join(map(str, outwg_emails[year]))}", file=outwgFile)
    