import sys
import logging
sys.path.append(".")
from astropy.io import ascii
from simple import REFERENCE_TABLES
from astrodb_utils import load_astrodb
from astrodb_utils.sources import logger
from astrodb_utils.publications import (
    find_publication,
    ingest_publication
)

astrodb_utils_logger = logging.getLogger("astrodb_utils")
logger.setLevel(logging.DEBUG)  # Set logger to INFO/DEBUG/WARNING/ERROR/CRITICAL level
astrodb_utils_logger.setLevel(logging.DEBUG)

SAVE_DB = True  # save the data files in addition to modifying the .db file
RECREATE_DB = True  # recreates the .db file from the data files
SCHEMA_PATH = "simple/schema.yaml"
db = load_astrodb(
    "SIMPLE.sqlite",
    recreatedb=RECREATE_DB,
    reference_tables=REFERENCE_TABLES,
    felis_schema=SCHEMA_PATH,
)

link = (
    "scripts/ingests/calamari/calamari_data.csv"
)
link_2 = (
    "scripts/ingests/calamari/calamari_refs.csv"
)
calamari_table = ascii.read(
    link,
    format="csv",
    data_start=1,
    header_start=0,
    guess=False,
    fast_reader=False, 
    delimiter=",",
)

ref_table = ascii.read(
    link_2,
    format="csv",
    data_start=0,
    header_start=0,
    guess=False,
    fast_reader=False, 
    delimiter=",",
)
ref_ingested = -1 #GaiaEDR3 exists in database. Do not ingest as Gaia21
ref_already_exists = 2 #Roth24 and Schl03 already exist in database

#helper method to retrieve the publication links from calamari_data
def getRef(ref_index):
    ref = ref_index.split(',')[0]
    ref_link = ref_table[int(ref)]['ADS']
    if 'iopscience' not in ref_link or 'harvard.edu' not in ref_link:
        ref_link = ref_table[int(ref)]['Link']
    return ref_link

#helper method to retrieve the bibcode from a link
def extractADS(link):
    start = link.find('abs/')+4
    end = link.find('/abstract')
    ads = link[start:end]
    ads = ads.replace("%26", "&")
    return ads

#helper method to retrieve the doi from a link
def extractDOI(link):
    link = str(link)
    if 'iopscience' in link:
        start = link.find('article/')+8
        doi = link[start:]
        doi = doi.replace("/pdf", "")
    else:
        start = link.find('doi.org/')+8
        doi=link[start:]
    return doi

def otherReferencesList(ref):
    #get all the ids/indexes of the references
    ids = ref.split(", ")
    result = []
    #for each reference...
    for id in ids:
        link = ref_table[int(id)]['ADS']
        #if bibcode or doi is not directly in the link... go to Link column
        if 'iopscience' not in link or 'harvard.edu' not in link:
            link = ref_table[int(id)]['Link']
        #if bibcode is directly in the link
        if 'harvard.edu' in link:
            bibcode = extractADS(link)
            pub_result=find_publication(
                db=db,
                bibcode=bibcode
            )
            if pub_result[0]:
                result.append(pub_result[1])
            else:
                print(f"Warning: Publication not found for bibcode {bibcode}")
            #if doi code is found directly in the link
        elif 'iopscience' in link or 'doi.org' in link:
            doi=extractDOI(link)
            pub_result=find_publication(
                db=db,
                doi=doi
            )
            if pub_result[0]:
                result.append(pub_result[1])
            else:
                print(f"Warning: Publication not found for doi {doi}")
        #use reference name to find reference
        else:
            reference= ref_table[int(id)]['Ref']
            reference= reference.replace("+", "")
            reference=reference[0:4] + reference[-2:]
            pub_result=find_publication(
                db=db,
                reference=reference
            )
            if pub_result[0]:
                result.append(pub_result[1])
            else:
                print(f"Warning: Publication not found for reference {reference}")
    #return list of references
    return result


#ingest publication: Curr20
ingest_publication(
    db=db,
    doi = "10.3847/2041-8213/abc631"
)
ref_ingested+=1

for row in ref_table:
    #ingest publications
    #get the ADS link
    pub = row['ADS']
    #if the link doesn't provide the ADS key directly...
    pub_2 = row['Link']
    #if link provides ADS
    if 'harvard.edu' in pub:
        bib = extractADS(pub)
        print(bib)
        pub_found = find_publication(
            db = db,
            bibcode = bib
        )
        if pub_found[0] == False:
            ingest_publication(
                db=db,
                bibcode= bib
            )
            ref_ingested+=1
        else:
            ref_already_exists+=1
    #if link provides doi
    if 'iopscience' in pub:
        doi = extractDOI(pub)
        pub_found = find_publication(
            db=db,
            doi=doi,
        )
        if pub_found[0] == False:
            ingest_publication(
                db=db,
                doi= doi
            )
            ref_ingested+=1
        else:
            ref_already_exists+=1
    if 'doi.org' in pub_2:
        doi = extractDOI(pub_2)
        pub_found = find_publication(
            db=db,
            doi=doi,
        )
        if pub_found[0] == False:
            ingest_publication(
                db=db,
                doi= doi
            )
            ref_ingested+=1
        else:
            ref_already_exists+=1
    if pub == None:
        continue


logger.info(f"references ingested:{ref_ingested}")  # 9 references ingested
logger.info(f"references already exists:{ref_already_exists}")  # 25 references due to preexisting data
logger.info(f"total references:{ref_ingested+ref_already_exists}")  # 34 references total
# WRITE THE JSON FILES
if SAVE_DB:
    db.save_database(directory="data/")