import numpy as np
import pandas as pd
import csv
import re
import json

from ietfdata.datatracker import *
from ietfdata.mailarchive2 import *

from ietfdata.datatracker_ext import * #DataTrackerExt

import matplotlib.pyplot as plt

import datetime
from datetime import datetime
from datetime import date
import pytz
import seaborn as sns

import tldextract

import pickle

plt.rc('font',**{'family':'serif','serif':['Helvetica']})
plt.rc('axes', axisbelow=True)
plt.rcParams['pdf.fonttype'] = 42

year_original_datatracker_emailID = dict()
year_nodatatracker_emailID = dict()
year_mapped_datatracker_emailID = dict()
year_automated_emailID = dict()
year_rolebased_emailID = dict()

ren = r'(?:\.?)([\w\-_+#~!$&\'\.]+(?<!\.)(@|[ ]?\(?[ ]?(at|AT)[ ]?\)?[ ]?)(?<!\.)[\w]+[\w\-\.]*\.[a-zA-Z-]{2,3})(?:[^\w])'
ren2 = r'([a-zA-Z0-9._-]+@[a-zA-Z0-9._-]+\.[a-zA-Z0-9_-]+)'

archive = MailArchive()

#email_all_set = set()
email_all_dict = dict()
empty_lists = set()
error_maillists = dict()

for mailing_list_name in archive.mailing_list_names():
    
    ml = archive.mailing_list(mailing_list_name)
    if ml:
        
        if ml.num_messages() > 0:
            for message in ml.messages():
                try:
                    e = message.header_from()
                    e = e.replace("'","__apostrophe__")
                    x = re.findall(ren,str(e))
                    
                    if len(x) == 0:
                        x = re.findall(ren2,str(e))
                        if len(x) > 0:
                            email = x[0]
                    else:
                        email = x[0][0]
                    email = email.replace("__apostrophe__","'")#.lower()
                    
                    e = e.replace(email,'')
                    
                    email = email.lower()
                    
                    if '@' not in email:
                        email = email.replace(' at ','@').lower()
                    
                    if email in email_all_dict:
                        email_all_dict[email] = [email_all_dict[email][0],email_all_dict[email][1]+1]
                        continue
                            
                    e = e.strip('>')
                    e = e.strip('<')
                    e = e.strip()
                    e = e.strip('"')
                    e = e.strip()
                    e = e.lower()
                        
                    if email != '':
                        #email_all_set.add(email)
                        email_all_dict[email] = [e,1]
                    
                    if mailing_list_name in error_maillists:
                        error_maillists[mailing_list_name] = [error_maillists[mailing_list_name][0]+1,error_maillists[mailing_list_name][1]]
                    else:
                        error_maillists[mailing_list_name] = [1,0]
                except:
                    if mailing_list_name in error_maillists:
                            error_maillists[mailing_list_name] = [error_maillists[mailing_list_name][0],error_maillists[mailing_list_name][1]+1]
                    else:
                        error_maillists[mailing_list_name] = [0,1]
                    continue
            #print(mailing_list_name,str(len(email_all_dict)))
            
        else:
            #print(mailing_list_name, "ZERO MESSAGE", len(email_all_dict))
            empty_lists.add(mailing_list_name)
            continue

with open('data/overall_email_unique.csv', 'w',encoding='utf8') as wfile:
    writer = csv.writer(wfile, delimiter='\t',escapechar='\\', quotechar=None)
    for k in email_all_dict:
        nm = ""
        if len(email_all_dict[k]) > 0:
            nm = email_all_dict[k][0].lower()
            nm.replace('\n','').replace('\t',' ').strip()
        writer.writerow([k.lower(),nm])