#%%
"""Script to create affiliation network from the RP bibliography.
"""
import sys
import numpy
import re
from pybtex.database.input import bibtex
import pybtex.errors
import json
import datetime
import time
from progress.bar import ChargingBar
import igraph as ig
import pycountry
from geopy.geocoders import Nominatim


# get coordinates for countries
def getCoord(country):
    # Create a Nominatim geolocator object
    geolocator = Nominatim(user_agent="getCoordinates")
    location = (nan,nan)

    if country == 'Bangladesh,':
        country = "People's Republic of Bangladesh"
    if country == 'Georgia':
        country = "Republic of Georgia"
    if country == 'Palestinian Territories':
        country = "the State of Palestine"
    if country == 'Jordan':
        country = "Hashemite Kingdom of Jordan"
    if country == 'Lebanon':
        country = "Lebanese Republic"
    if country == 'Turkey':
        country = "Türkiye"
    if country == 'Greece':
        country = "Hellenic Republic"
    if country == 'Mauritius':
        country = "Republic of Mauritius"
    if country == 'Russia':
        country = "Russian Federation"
    if country == 'None':
        return nan, nan

    countryIDs = pycountry.countries.get(name=country)
    
    if country in country_coordinates:
       latitude, longitude = country_coordinates[country]
    
    else:
        try:
            #location = geolocator.geocode(, country_codes=country_code.alpha_2)
            location = geolocator.geocode(country)

        except:
            location = geolocator.geocode(country)
    
        country_coordinates[country] = [location.latitude, location.longitude]
       
        # store coordinate
        
        with open(coordsFile, 'w') as f:
            json.dump(country_coordinates, f, indent=4)                   
        
        # Get the latitude and longitude coordinates
        latitude = location.latitude
        longitude = location.longitude

    return latitude, longitude


#%% some pre-settings
affiliationsFile = '../Data/affiliations.json'
coordsFile = '../Data/country_coordinates.json'
countries = {}
affiliations = {}


#%% import data

# import rp.bib
print("Read rp.bib")
pybtex.errors.set_strict_mode(True)
parser = bibtex.Parser()
bibdata = parser.parse_file("../rp.bib")
labels = sorted(bibdata.entries.keys())

# import affiliations
with open(affiliationsFile,'r') as f:
     affiliations = json.load(f)
     f.close()

# import country coordinates
try:
    with open(coordsFile,'r') as f:
         country_coordinates = json.load(f)
         f.close()
except FileNotFoundError:
    country_coordinates = {}





#%% create network on country level
nodeList = {}
nodeID = {}
countryNumPaper = {}
cnt = 0
cntNoAff = 0
cntAff = 0

mat = numpy.zeros((len(labels),len(labels)),int)

for bib_id in labels:
   if bib_id not in affiliations:
       print(f"{bib_id} not in affiliations")

   # ignore papers "software" and "related"    
   entry = bibdata.entries[bib_id]
   if entry.fields["annote"] == 'Software' or entry.fields["annote"] == 'Related':
       continue

   cntAff += 1
   if affiliations[bib_id]==[]:
       print(f"No affiliation available for {bib_id}")
       cntNoAff += 1

   paperCountries = []
   for idx,entry in enumerate(affiliations[bib_id]):
       authorAffiliations = entry['author_affiliations']
       for aff in authorAffiliations:
           paperCountries.append(aff['author_country'])
           # add country to node list if not yet included
           if not aff['author_country'] in nodeID:
               nodeID[aff['author_country']] = cnt
               nodeList[cnt] = aff['author_country']
               cnt += 1

   # create network matrix
   paperCountries = unique(paperCountries)
   for countries1 in paperCountries:
   
       if countries1 not in countryNumPaper:
           countryNumPaper[countries1] = 1
       else:
           countryNumPaper[countries1] += 1
           
       
       for countries2 in paperCountries:
           if nodeID[countries1] != nodeID[countries2]:
               mat[nodeID[countries1],nodeID[countries2]] += 1


print(f"{cntNoAff} (from {cntAff}) affiliations not available")


# remove empty nodes 
mat = mat[:len(nodeID), :len(nodeID)]

# create network object
G = ig.Graph.Adjacency(mat.tolist(), mode="undirected")
coords = np.zeros((len(nodeList),2))

# assign attributes to nodes
bar = ChargingBar('Node attributes', max=len(nodeList), suffix='%(percent).0f%% - %(eta)ds')
for node, vertex in enumerate(G.vs):
    bar.next()
    vertex['country'] = nodeList[node]
    coord = getCoord(nodeList[node])
    coords[node] = [coord[0], coord[1]]
    vertex['latitude'] = coord[0]
    vertex['longitude'] = coord[1]
    vertex['nPaper'] = countryNumPaper[nodeList[node]]
bar.finish()

G.write_graphmlz("../Data/affiliationsCountry.graphml.gz") # using igraph


# prepare a map with the countries
import geopandas as gpd
import matplotlib.pyplot as plt

plt.clf()
world = gpd.read_file(gpd.datasets.get_path('naturalearth_lowres'))
world.plot()
# Example list of coordinates (longitude, latitude)

# Convert the list of coordinates to a GeoDataFrame
points = gpd.GeoDataFrame(geometry=gpd.points_from_xy([c[1] for c in coords], [c[0] for c in coords]))

# Plot the points
points.plot(ax=plt.gca(), color='red', markersize=5)

for i, row in points.iterrows():
    if nodeList[i] == 'None':
       continue
    plt.annotate(nodeList[i], xy=(row['geometry'].x, row['geometry'].y), xytext=(3, 3),
                 textcoords="offset points", fontsize=8)





#%% create network on institution level
nodeList = {}
nodeID = {}
countPaper = {}
cnt = 0

mat = numpy.zeros((len(labels),len(labels)),int)

for bib_id in labels:
   if not bib_id in affiliations:
       print(f"{bib_id} not in affiliations")


   # ignore papers "software" and "related"    
   entry = bibdata.entries[bib_id]
   if entry.fields["annote"] == 'Software' or entry.fields["annote"] == 'Related':
       continue

   paperInstituts = []
   for idx,entry in enumerate(affiliations[bib_id]):
       authorAffiliations = entry['author_affiliations']
       for aff in authorAffiliations:
           paperInstituts.append(aff['affiliation_name'])
           # add institution to node list if not yet included
           if not aff['affiliation_name'] in nodeID:
               nodeID[aff['affiliation_name']] = cnt
               nodeList[cnt] = aff['affiliation_name']
               cnt += 1

   # create network matrix
   paperInstituts = unique(paperInstituts)
   for institute1 in paperInstituts:
       if not institute1 in countPaper:
           countPaper[institute1] = 0
       countPaper[institute1] += 1
       for institute2 in paperInstituts:
           if nodeID[institute1] != nodeID[institute2]:
               mat[nodeID[institute1],nodeID[institute2]] += 1

# remove empty nodes 
mat = mat[:len(nodeID), :len(nodeID)]

# create network object
G = ig.Graph.Adjacency(mat.tolist(), mode="undirected")

# assign attributes to nodes
bar = ChargingBar('Node attributes', max=len(nodeList), suffix='%(percent).0f%% - %(eta)ds')
for node, vertex in enumerate(G.vs):
    bar.next()
    vertex['institute'] = nodeList[node]
    vertex['papers'] = countPaper[nodeList[node]]
bar.finish()

G.write_graphmlz("../Data/affiliationsInstitute.graphml.gz") # using igraph

