import sys
import logging
import sqlalchemy
sys.path.append(".")
from astropy.io import ascii
from simple import REFERENCE_TABLES
from astrodb_utils import load_astrodb
from astrodb_utils.sources import (
    find_source_in_db,
    ingest_source,
    logger,
    AstroDBError,
    ingest_name
)

from astrodb_utils.publications import (
    find_publication,
)

astrodb_utils_logger = logging.getLogger("astrodb_utils")
logger.setLevel(logging.DEBUG)  # Set logger to INFO/DEBUG/WARNING/ERROR/CRITICAL level
astrodb_utils_logger.setLevel(logging.DEBUG)

SAVE_DB = True  # save the data files in addition to modifying the .db file
RECREATE_DB = True  # recreates the .db file from the data files
SCHEMA_PATH = "simple/schema.yaml"
db = load_astrodb(
    "SIMPLE.sqlite",
    recreatedb=RECREATE_DB,
    reference_tables=REFERENCE_TABLES,
    felis_schema=SCHEMA_PATH,
)

link = (
    "scripts/ingests/calamari/calamari_data.csv"
)
link_2 = (
    "scripts/ingests/calamari/calamari_refs.csv"
)
calamari_table = ascii.read(
    link,
    format="csv",
    data_start=1,
    header_start=0,
    guess=False,
    fast_reader=False, 
    delimiter=",",
)

ref_table = ascii.read(
    link_2,
    format="csv",
    data_start=0,
    header_start=0,
    guess=False,
    fast_reader=False, 
    delimiter=",",
)

sources_ingested = 0
sources_already_exists = -9 #ingest 3 sources at the start. Also ingest the resolved children separately
                            #They will not be counted as already existing in the database
names_ingested=-6 #remove resolved children names from unresolved parent JSON file

#helper method to retrieve the publication links from calamari_data
def getRef(ref_index):
    ref = ref_index.split(',')[0]
    ref_link = ref_table[int(ref)]['ADS']
    if 'iopscience' not in ref_link or 'harvard.edu' not in ref_link:
        ref_link = ref_table[int(ref)]['Link']
    return ref_link

#helper method to retrieve the bibcode from a link
def extractADS(link):
    start = link.find('abs/')+4
    end = link.find('/abstract')
    ads = link[start:end]
    ads = ads.replace("%26", "&")
    return ads

#helper method to retrieve the doi from a link
def extractDOI(link):
    link = str(link)
    if 'iopscience' in link:
        start = link.find('article/')+8
        doi = link[start:]
        doi = doi.replace("/pdf", "")
    else:
        start = link.find('doi.org/')+8
        doi=link[start:]
    return doi

def otherReferencesList(ref):
    #get all the ids/indexes of the references
    ids = ref.split(", ")
    result = []
    #for each reference...
    for id in ids:
        link = ref_table[int(id)]['ADS']
        #if bibcode or doi is not directly in the link... go to Link column
        if 'iopscience' not in link or 'harvard.edu' not in link:
            link = ref_table[int(id)]['Link']
        #if bibcode is directly in the link
        if 'harvard.edu' in link:
            bibcode = extractADS(link)
            pub_result=find_publication(
                db=db,
                bibcode=bibcode
            )
            if pub_result[0]:
                result.append(pub_result[1])
            else:
                print(f"Warning: Publication not found for bibcode {bibcode}")
            #if doi code is found directly in the link
        elif 'iopscience' in link or 'doi.org' in link:
            doi=extractDOI(link)
            pub_result=find_publication(
                db=db,
                doi=doi
            )
            if pub_result[0]:
                result.append(pub_result[1])
            else:
                print(f"Warning: Publication not found for doi {doi}")
        #use reference name to find reference
        else:
            reference= ref_table[int(id)]['Ref']
            reference= reference.replace("+", "")
            reference=reference[0:4] + reference[-2:]
            pub_result=find_publication(
                db=db,
                reference=reference
            )
            if pub_result[0]:
                result.append(pub_result[1])
            else:
                print(f"Warning: Publication not found for reference {reference}")
    #return list of references
    return result

def ingest_resolved_children(
    db,
    source,
    reference: str,
    *,
    ra: float = None,
    dec: float = None,
    epoch: str = None,
    equinox: str = None,
    other_reference: str = None,
    comment: str = None,
    raise_error: bool = True,
    ra_col_name: str = "ra",
    dec_col_name: str = "dec",
    epoch_col_name: str = "epoch",
):
    # Construct data to be added
    source_data = [
        {
            "source": source,
            ra_col_name: ra,
            dec_col_name: dec,
            "reference": reference,
            epoch_col_name: epoch,
            "equinox": equinox,
            "other_references": other_reference,
            "comments": comment,
        }
    ]
    logger.debug(f"   Data: {source_data}.")

    # Try to add the source to the database
    try:
        with db.engine.connect() as conn:
            conn.execute(db.Sources.insert().values(source_data))
            conn.commit()
        msg = f"Added {source_data}"
        logger.info(f"Added {source}")
        logger.debug(msg)
    except sqlalchemy.exc.IntegrityError:
        msg = f"Not ingesting {source}. Not sure why. \n"
        msg2 = f"   {source_data} "
        logger.warning(msg)
        logger.debug(msg2)

    # Add the source name to the Names table
    ingest_name(db, source=source, other_name=source, raise_error=raise_error)
    return

#ingest source WISE J124332.17+600126.6
ingest_source(
    db=db,
    source = "WISE J124332.17+600126.6",
    reference="Fahe21",
    ra=190.88386,
    dec=60.023957,
    ra_col_name="ra",
    dec_col_name="dec"
)
sources_ingested+=1

# #ingest source BD+60 1417
ingest_source(
    db=db,
    source = "BD+60 1417",
    reference="Fahe21",
    ra = 190.888634,
    dec = 60.01464,
    ra_col_name="ra",
    dec_col_name="dec",
    search_db=False
)
sources_ingested+=1

#ingest source HD 2057
ingest_source(
    db=db,
    source = "HD 2057",
    reference="Reid06.891",
    ra = 6.286472,
    dec = 48.047403,
    ra_col_name="ra",
    dec_col_name="dec"
)
sources_ingested+=1

#ingest unresolved parents

# #ingest source Gl 337 CD
# ingest_source(
#     db=db,
#     source = "Gl 337CD",
#     reference="GaiaEDR3",
#     other_reference="Wils01",
#     ra = 138.0584919,
#     dec = 14.9956706,
#     ra_col_name="ra",
#     dec_col_name="dec"
# )
# sources_ingested+=1

# #ingest source Gl 417 BC
# ingest_source(
#     db=db,
#     source = "Gl 417BC",
#     reference = "GaiaEDR3",
#     other_reference="Kirk00",
#     ra = 168.1055653,
#     dec = 35.8028953,
#     ra_col_name="ra",
#     dec_col_name="dec"
# )
# sources_ingested+=1
ingest_resolved_children(
    db=db,
    source = "Gl 337C",
    ra = 138.0584919,
    dec=14.9956706,
    reference="GaiaEDR3",
    other_reference="Wils01"
)
sources_ingested+=1

ingest_resolved_children(
    db=db,
    source = "Gl 337D",
    ra = 138.0584919,
    dec=14.9956706,
    reference="GaiaEDR3",
    other_reference="Wils01"
)
sources_ingested+=1

ingest_resolved_children(
    db=db,
    source = "HD 130948C",
    ra = 222.566667,
    dec=23.911611,
    reference="Dupu09.729",
    other_reference="Pott22"
)
sources_ingested+=1

ingest_resolved_children(
    db=db,
    source = "HD 130948B",
    ra = 222.566667,
    dec=23.911611,
    reference="Dupu09.729",
    other_reference="Pott22"
)
sources_ingested+=1

ingest_resolved_children(
    db=db,
    source = "Gl 417B",
    ra = 168.1055653,
    dec=35.8028953,
    reference="GaiaEDR3",
    other_reference="Kirk00"
)
sources_ingested+=1

ingest_resolved_children(
    db=db,
    source = "Gl 417C",
    ra = 168.1055653,
    dec=35.8028953,
    reference="GaiaEDR3",
    other_reference="Kirk00"
)
sources_ingested+=1

object_index=0
#ingest the sources
for row in calamari_table:
    #read in row
    Dec = row['Dec']
    RA = row['RA']
    object = str(row['Object'])

    # check the object in the row is in the DB
    #if not, ingest
    obj_result = find_source_in_db(db=db, source = object, ra=RA, dec=Dec, ra_col_name="ra", dec_col_name="dec")
    ref_list = otherReferencesList(calamari_table[object_index]['Ref'])
    if len(obj_result)==0:
        #if the source has multiple references
        if(len(ref_list)>1):
            ingest_source(
                db=db,
                source = object,
                reference = ref_list[0],
                other_reference= ",".join(map(str, ref_list[1:])),
                ra = RA,
                dec = Dec,
                ra_col_name="ra",
                dec_col_name="dec"
            )
            sources_ingested+=1
        else:
            #if the source has one reference
            ingest_source(
                db=db,
                source = object,
                reference=ref_list[0],
                ra = RA,
                dec = Dec,
                ra_col_name = "ra",
                dec_col_name = "dec"
            )
            sources_ingested+=1
    elif len(obj_result)==1:
        #ingest names
        ingest_name(
            db=db,
            source=obj_result[0],
            other_name=object
        )
        names_ingested+=1
        sources_already_exists+=1
    else: 
        sources_already_exists+=1
    object_index+=1

logger.info(f"sources ingested:{sources_ingested}")  # 20 ingested
logger.info(f"sources already exists:{sources_already_exists}")  # 44 due to preexisting data
logger.info(f"total sources:{sources_ingested+sources_already_exists}")  # 64 sources total
logger.info(f"names ingested:{names_ingested}") # 42 names ingested
# WRITE THE JSON FILES
if SAVE_DB:
    db.save_database(directory="data/")