#!/opt/local/bin/python3.9

#%%
"""Author statistics and co-author network from the RP bibliography.
  first do "iconv -f utf-8 -t ascii//TRANSLIT rp.bib > rp2"
  in order to get rid of malformed characters
"""
from numpy import*
#from string import maketrans 
from pybtex.database.input import bibtex
import codecs
import igraph as ig
#from progress.bar import IncrementalBar
from progress.bar import ChargingBar
from networkx.algorithms import community
import datetime


# number of top 
topN = 10

currentYear = datetime.datetime.now().year
minYear = 1987
maxYear = currentYear





#%% open a bibtex file
#parser = bibtex.Parser(encoding='ISO-8859-1')
parser = bibtex.Parser()
bibdata = parser.parse_file("../rp.bib")

dic = {"{\k c}":"c","{\k a}":"a","{":"","}":"","\\r":"","\r":"","\l":"","\H":"","\c":"","\o":"o","\k":"","\\O":"O","\\ae":"","\\c ":"","\\u ":"","\\.":"","\\. ":"","\\^":"","\\v ":"","\\v":"","\\` ":"","\\' ":"","\\`":"","\\\'":"","\\\"":"","\\~":"",
"\\\"a":"a", "\\\"u":"u", "\\\"o":"o", "\\ss":"ss", "\\'a":"a", "\\'o":"o", "\\'e":"e", "\\'i":"i", "\\i":"i", "\\l ":"l", "{":"", "}":"","\\\\":""}

def replace_all(text, dic):
    for i, j in dic.items():
        text = text.replace(i, j)
    return text


authorperpaper = {}
paperperyear = {}
numberworks = {}
numberworks1 = {} # only count first author papers
authorlist = []
authordic = {}
labellist = {}
journallist = []
journalnumber = {}
authorstartyear = {}
authorendyear = {}
startyear = {}
endyear = {}
ignore = {}
i = 0
cnt = 0

npaper = len(bibdata.entries)


#%% loop through the individual references
bar = ChargingBar('read bibtex file', max=npaper)
for bib_id in bibdata.entries:
    bar.next()
    
    ignore[bib_id] = 0

    # ignore papers "software" and "related"    
    if "annote" in bibdata.entries[bib_id].fields and (bibdata.entries[bib_id].fields["annote"] == "Related" or bibdata.entries[bib_id].fields["annote"] == "Software"):
        ignore[bib_id] = 1
        continue

    # handle papers in press    
    year = bibdata.entries[bib_id].fields.get('year', '')
    if year == "in press":
        # continue # if papers in press should be ignored
        bibdata.entries[bib_id].fields['year'] = maxYear
        year = maxYear
    
    if int(year) < minYear:
        ignore[bib_id] = 1
        continue
    
    # get input fields
    authorperpaper[bib_id] = 0
    
    if "year" in bibdata.entries[bib_id].fields:
        year = bibdata.entries[bib_id].fields["year"]
        if year not in paperperyear:
            paperperyear[year] = 0
            
        paperperyear[year]+=1
    
    authNum = 0
    for author in bibdata.entries[bib_id].persons["author"]:
        #print str(author)
        author = replace_all(str(author), dic)
        authorperpaper[bib_id]+=1

        if author not in labellist:
            labellist[author] = []
        labellist[author].append(str(bib_id))
        if author not in authorlist:
             authorlist.append(author)
             authordic[author] = i
             numberworks[author] = 0
             numberworks1[author] = 0
             authorstartyear[author] = maxYear
             authorendyear[author] = 0
             i += 1
        
        numberworks[author] += 1
        if authNum == 0:
             numberworks1[author] += 1
        authNum += 1
       
        
        currentpaper_year = int(bibdata.entries[bib_id].fields["year"])
        if currentpaper_year < authorstartyear[author]:
            authorstartyear[author] = currentpaper_year
        if currentpaper_year > authorendyear[author]:
            authorendyear[author] = currentpaper_year
        
        
    if "journal" in bibdata.entries[bib_id].fields:
        journal = replace_all(bibdata.entries[bib_id].fields["journal"], dic)

        if str(journal) not in journallist:
            journallist.append(str(journal))
            journalnumber[str(journal)] = 0
        journalnumber[str(journal)] += 1

    if "booktitle" in bibdata.entries[bib_id].fields:
        journal = replace_all(bibdata.entries[bib_id].fields["booktitle"], dic)

        if str(journal) not in journallist:
            journallist.append(str(journal))
            journalnumber[str(journal)] = 0
        journalnumber[str(journal)] += 1
            
    cnt += 1

bar.finish()

nauthors = len(authordic)

dt = [('CoWorks',int)]
mat = zeros((nauthors,nauthors),int)
mat2 = zeros((nauthors,nauthors),dtype=dt)

print (">> found \033[1m\033[97m%i\033[0m bibtex entries, \033[1m\033[97m%i\033[0m authors" %(cnt, len(authordic)))




#%% create network matrix
bar = ChargingBar('create network matrix', max=npaper)
for bib_id in bibdata.entries:
    #cnt += 1
    bar.next()
    
    if ignore[bib_id]:
        continue
    
    
    b = bibdata.entries[bib_id].fields
    
    if "annote" in bibdata.entries[bib_id].fields and (bibdata.entries[bib_id].fields["annote"] == "Related" or bibdata.entries[bib_id].fields["annote"] == "Software"):
        continue

    #deal with multiple authors
    for author in bibdata.entries[bib_id].persons["author"]:
        author = replace_all(str(author), dic)
        i1 = authordic[str(author)]

        #print (str(author) + " " + str(i1))

        for author2 in bibdata.entries[bib_id].persons["author"]:
            author2 = replace_all(str(author2), dic)
            i2 = authordic[str(author2)]
            # increase size of mat if necessary
            newsize = max(i1,i2) - len(mat) + 1
            if len(mat) <= max(i1,i2):
               mat = pad(mat,(0,newsize),mode='constant')
            #print i1,i2,str(author), str(author2)
            mat[i1,i2] += 1
            
            # add start and end year of co-authorship
            if bibdata.entries[bib_id].fields["year"] == 'in press':
                currentpaper_year = maxYear
            else:
                currentpaper_year = int(bibdata.entries[bib_id].fields["year"])
                
            if (i1,i2) not in endyear or currentpaper_year > endyear[i1,i2]:
                endyear[i1,i2] = currentpaper_year
                
            if (i1,i2) not in startyear or currentpaper_year < startyear[i1,i2]:
                startyear[i1,i2] = currentpaper_year
                
            #print mat


bar.finish()


N = len(mat);



## Remove self-connections
for i in range(N):
    mat[i,i] = 0
    mat2[i,:] = mat[i,:]



#%% create network with NetworkX or igraph
res = dict((v,k) for k,v in authordic.items()) # exchange keys and values in the authordic
numW = dict((v,numberworks[authorlist[v]]) for k,v in authordic.items())
startY = dict((v,authorstartyear[authorlist[v]]) for k,v in authordic.items())
endY = dict((v,authorendyear[authorlist[v]]) for k,v in authordic.items())
timeinterval = dict((v,(authorstartyear[authorlist[v]], authorendyear[authorlist[v]])) for k,v in authordic.items())
numW1 = dict((v,numberworks1[authorlist[v]]) for k,v in authordic.items())


# create network with igraph
G = ig.Graph.Adjacency(mat.tolist(), mode="undirected", edge_attrs=["CoWorks"])

# Add nodes and set attributes with igraph
bar = ChargingBar('Node attributes', max=len(numW))
for node, vertex in enumerate(G.vs):
    bar.next()
    vertex['numberWorks'] = numW[node]
    vertex['Label'] = res[node]
    vertex['start'] = startY[node]
    vertex['end'] = endY[node]
    vertex['firstauthor'] = numW1[node]
bar.finish()

# Add edges and set attributes with igraph
bar = ChargingBar('Edge attributes', max=len(startyear))
for (i, j), start_year in startyear.items():
    bar.next()
    try:
        edge = G.get_eid(i, j, directed=False)
        if edge != -1:  # Check if the edge exists in the graph
            G.es[edge]['start'] = start_year
            G.es[edge]['end'] = endyear[(i, j)]
    except:
        continue
bar.finish()

# Calculate network measures
bar = ChargingBar('calculate network measures', max=2)
degree_centrality = G.degree() # using igraph
d = {node.index: dc for node, dc in zip(G.vs, degree_centrality)}
bar.next()
betweenness_centrality = G.betweenness() # using igraph
betweenness_centrality_log = [log10(x + 1) for x in betweenness_centrality]
bb = {node.index: bc for node, bc in zip(G.vs, betweenness_centrality_log)}
bar.next()
#vit = G.closeness() # using igraph
#bar.next()

# set attributes with igraph
#    for node in range(len(mat2)):
    #G.vs[node]['degree'] = d[node]
    #G.vs[node]['betweenness'] = bb[node]
    #G.vs[node]['closeness_vitality'] = vit[node]
bar.finish()







#%% save authors' number of contributions (number of 1st author papers and total number of co-authored papers)
topauthors = sorted(numberworks1.items(), key = lambda kv:(kv[1], kv[0]), reverse=True)
rank = 0
oldNum = 0
cnt = 0
topN = 10;

f = open('../Data/authorspublications.txt', 'w')
f.write(f'Rank, Author, Number of works 1st author, Number of all works\n')

for author in topauthors:
    if oldNum != numberworks1[author[0]]:
        rank += 1
    f.write(f'{rank}; {author[0]}; {numberworks1[author[0]]}; {numberworks[author[0]]}\n')
    if cnt <= topN+1 or numberworks[author[0]] >= 50:
        print(f'{rank}\t&{author[0]}\t&{numberworks1[author[0]]}\t&{numberworks[author[0]]}\\\\')
    oldNum = numberworks1[author[0]]
    cnt += 1
f.close()





#%% time spend in RP research
duration = {}
authorfile = open('../Data/authorstime.txt', 'w')
for au in authorlist:
   duration[au] = authorendyear[au] - authorstartyear[au] + 1
   print('%s; %d' %(au, duration[au]), file=authorfile)
authorfile.close()

topauthors = sorted(duration.items(), key = lambda kv:(kv[1], kv[0]), reverse=True)

numW = 0
authlist = ''
cnt = 0
for i in range(0,len(topauthors)):
    if numW == topauthors[i][1]:
       authlist = authlist + '; ' + topauthors[i][0]
    else:
       if numW > 0:
          print(f'{cnt+1}\t&{authlist}\t&{numW}\\\\')
          cnt += 1
          if topauthors[i][1] < 20:
              break
       numW = topauthors[i][1]
       authlist = topauthors[i][0]
       

histogram(list(duration.values()))


#%%



# save authorperpaper
authorperpaperfile = open('../Data/authorperpaper.txt', 'w')
for k in authorperpaper:
    print ('%s\t%d' %(k, authorperpaper[k]), file=authorperpaperfile)
authorperpaperfile.close()










#%% save network
print ("save network as graphml (for loading with Gephy 0.9.2)")

G.write_graphmlz("../Data/coauthor_network.graphml.gz") # using igraph


