"""
Download, quality control, and generate pseudoabscense data for
ICCAT tagged species.
"""

import os
from pathlib import Path
from tempfile import TemporaryDirectory
from typing import List

from dagster import (
    AssetKey,
    composite_solid,
    EventMetadata,
    Failure,
    Field,
    InputDefinition,
    Output,
    OutputContext,
    OutputDefinition,
    pipeline,
    repository,
    RetryRequested,
    solid,
)
from dagster.experimental import DynamicOutput, DynamicOutputDefinition
from dagster_shell.utils import execute
import pandas as pd

from facet_shared.resources import (
    S3FsSpecFilesystem,
    LocalFsMode,
    S3FsMode,
    local_preset,
    do_s3_preset,
)


ICCAT_SPECIES_CODE = "iccat_species_code"
ICCAT_SPECIES_CODES = {
    "Albacore": "ALB",
    "Bigeye Tuna": "BET",
    "Atlantic Bluefin Tuna": "BFT",
    "Blue shark": "BSH",
    "Atlantic Blue Marlin": "BUM",
    "Porbeagle": "POR",
    "Atlantic Sailfish": "SAI",
    "Skipjack Tuna": "SKJ",
    "Shortfin Mako": "SMA",
    "Swordfish": "SWO",
    "Atlantic White Marlin": "WHM",
    "Yellowfin Tuna": "YFT",
}
ICCAT_SPECIES = {v: k for k, v in ICCAT_SPECIES_CODES.items()}


@solid(
    config_schema={
        "number_of_species": Field(
            int,
            default_value=2,
            is_required=False,
            description=f"Number of species to put through the pipeline, up to {len(ICCAT_SPECIES_CODES)}",
        )
    },
    output_defs=[
        DynamicOutputDefinition(str, ICCAT_SPECIES_CODE, "ICCAT species code")
    ],
)
def iccat_species(context) -> List[str]:
    """ Generate the ICCAT species codes to be used by downstream solids. 
    
    Can be limited to a subset of species by passing `number_of_species` ICCAT config value.
    """

    species_number = min(
        context.solid_config["number_of_species"], len(ICCAT_SPECIES_CODES)
    )
    for species_code in list(ICCAT_SPECIES_CODES.values())[:species_number]:
        yield DynamicOutput(
            species_code, mapping_key=species_code, output_name=ICCAT_SPECIES_CODE
        )


@solid(required_resource_keys={"fs"})
def get_bathy_path(context) -> str:
    """ Switch bathymetry paths based on if S3 or local storage is being used """

    base_bathy_path = "bathy/global_bathy_0.01.nc"

    if isinstance(context.resources.fs, S3FsSpecFilesystem):
        return os.path.join(context.resources.fs.bucket, base_bathy_path)

    return os.path.join("/data/", base_bathy_path)


def iccat_xls_asset_key(context: OutputContext) -> AssetKey:
    species = context.step_key.split("[")[-1].strip("]")
    return AssetKey(["ICCAT", "Download", species])


@solid(
    input_defs=[InputDefinition(ICCAT_SPECIES_CODE, str, "ICCAT species code")],
    output_defs=[
        OutputDefinition(
            str,
            "iccat_xls_path",
            "Path to downloaded ICCAT xls",
            asset_key=iccat_xls_asset_key,
        )
    ],
    required_resource_keys={"fs"},
)
def iccat_download(context, iccat_species_code: str) -> str:
    """ Download ICCAT species data for given species code and decompress to .xlsx """

    output_xls = context.resources.fs.PutFile(f"download/_tag{iccat_species_code}.xlsx")

    with TemporaryDirectory() as temp_dir_str, output_xls as temp_output_xls:
        context.log.info(f"Making mock file for ICCAT download script")

        temp_species_dir = Path(temp_dir_str)
        temp_species_file = temp_species_dir / f"{iccat_species_code}.txt"
        with temp_species_file.open("w") as f:
            f.write(iccat_species_code)

        command = f"/opt/conda/bin/Rscript /home/facet/R/iccat_download.r {temp_species_file} {temp_output_xls}"
        output, return_code = execute(command, log=context.log, output_logging="BUFFER")
        if return_code:
            raise RetryRequested(max_retries=3, seconds_to_wait=5)

        context.log.info(f"Downloaded file {temp_output_xls}")

    yield Output(
        output_xls.dest_path,
        "iccat_xls_path",
        metadata={
            **output_xls.metadata(),
            "ICCAT species code": iccat_species_code,
            "Species": ICCAT_SPECIES[iccat_species_code],
            "Source URL": EventMetadata.url(
                f"https://www.iccat.int/Data/Tag/_tag{iccat_species_code}.7z"
            ),
        },
    )


def iccat_qc_asset_key(context: OutputContext) -> AssetKey:
    species = context.step_key.split("[")[-1].strip("]")
    return AssetKey(["ICCAT", "QC", species])


@solid(
    input_defs=[
        InputDefinition("iccat_xls_path", str, "Path to downloaded ICCAT XLS"),
        InputDefinition(ICCAT_SPECIES_CODE, str, "ICCAT species code"),
        InputDefinition("bathy_path", str, "Path to bathymetry NetCDF to use"),
    ],
    output_defs=[
        OutputDefinition(
            pd.DataFrame,
            "iccat_qc_df",
            asset_key=iccat_qc_asset_key,
            metadata={"path": "qc/{mapped_step_key}.csv"},
            io_manager_key="pandas",
        )
    ],
    required_resource_keys={"fs"},
)
def iccat_qc(
    context, iccat_xls_path: str, iccat_species_code: str, bathy_path: str
) -> str:
    """ Convert .xlsx to .csv and quality control ICCAT species data """

    iccat_xls = context.resources.fs.GetFile(iccat_xls_path)
    bathy_file = context.resources.fs.GetFile(bathy_path)

    with iccat_xls as temp_iccat_xls, bathy_file as temp_bathy_file, TemporaryDirectory() as tmp_dir:

        temp_output_csv = Path(tmp_dir) / f"{iccat_species_code}_qc.csv"

        command = f"/opt/conda/bin/Rscript /home/facet/R/iccat_qc.r {temp_iccat_xls} {temp_bathy_file} {temp_output_csv}"
        output, return_code = execute(command, log=context.log, output_logging="BUFFER")

        if return_code:
            raise Failure(
                description=f"Shell command execution failed with output: {output}"
            )

        context.log.info(
            f"QC performed on {temp_iccat_xls} and output to {temp_output_csv}"
        )

        df = pd.read_csv(temp_output_csv)

    yield Output(
        df,
        "iccat_qc_df",
        metadata={
            "ICCAT species code": iccat_species_code,
            "Species": ICCAT_SPECIES[iccat_species_code],
        },
    )


@composite_solid(
    input_defs=[
        InputDefinition(ICCAT_SPECIES_CODE, str, "ICCAT species code"),
        InputDefinition("bathy_path", str, "Path to bathymetry NetCDF to use"),
    ],
    output_defs=[OutputDefinition(pd.DataFrame, "iccat_qc_df")],
    description="Download and quality control ICCAT species data ",
)
def iccat_download_and_qc(iccat_species_code: str, bathy_path: str) -> pd.DataFrame:
    return iccat_qc(iccat_download(iccat_species_code), iccat_species_code, bathy_path)


@solid(
    input_defs=[
        InputDefinition(
            "iccat_qc_dfs", List[pd.DataFrame], "DataFrames of QCd ICCAT data"
        )
    ],
    output_defs=[
        OutputDefinition(
            pd.DataFrame,
            "iccat_combined",
            asset_key=AssetKey(["ICCAT", "Combined data"]),
            metadata={"path": "combined_tags.csv"},
            io_manager_key="pandas",
        )
    ],
    required_resource_keys={"fs", "pandas"},
)
def iccat_combine(context, iccat_qc_dfs: List[pd.DataFrame]) -> pd.DataFrame:
    """ Combine multiple QCd ICCAT DataFrames into a single file """

    df: pd.DataFrame = None

    for iccat_qc_df in iccat_qc_dfs:
        try:
            df = df.append(iccat_qc_df)
        except AttributeError:
            df = iccat_qc_df

    yield Output(
        df,
        "iccat_combined",
        metadata={"Species": ", ".join(df["SpeciesCode"].unique())},
    )


@solid(
    input_defs=[
        InputDefinition(
            "iccat_combined_df",
            pd.DataFrame,
            "Pandas DataFrame with combined data from multiple ICCAT species",
        ),
        InputDefinition("bathy_path", str, "Path to bathymetry NetCDF to use"),
    ],
    output_defs=[
        OutputDefinition(
            pd.DataFrame,
            "iccat_pseudoabs",
            asset_key=AssetKey(["ICCAT", "With pseudoabsences"]),
            metadata={"path": "with_pseudoabs.csv"},
            io_manager_key="pandas",
        )
    ],
    required_resource_keys={"fs"},
)
def iccat_pseudoabs(
    context, iccat_combined_df: pd.DataFrame, bathy_path: str
) -> pd.DataFrame:
    """ Generate pseudoabsences data for ICCAT data """

    bathy_file = context.resources.fs.GetFile(bathy_path)

    with TemporaryDirectory() as tmp_dir, bathy_file as temp_bathy_file:

        temp_iccat_combined_csv = Path(tmp_dir) / "iccat_combined.csv"
        iccat_combined_df.to_csv(temp_iccat_combined_csv, index=False)

        temp_output_csv = Path(tmp_dir) / "with_pseudoabs.csv"

        command = f"""/opt/conda/bin/Rscript /home/facet/R/generate_pseudoabs.r {temp_iccat_combined_csv} --index_var "SpeciesCode" --bathy_file "{temp_bathy_file}" --abs_ratio "5" --force_180 {temp_output_csv}"""
        output, return_code = execute(command, log=context.log, output_logging="BUFFER")

        if return_code:
            raise Failure(
                description=f"Shell command execution failed with output: {output}"
            )

        df = pd.read_csv(temp_output_csv)

    yield Output(
        df,
        "iccat_pseudoabs",
        metadata={"Species": ", ".join(df["SpeciesCode"].unique())},
    )


@pipeline(
    mode_defs=[LocalFsMode, S3FsMode],
    preset_defs=[
        local_preset("/data/dagster/iccat/"),
        do_s3_preset("dagster-test/iccat"),
    ],
)
def iccat():
    """
    Download, quality control, and generate pseudoabscence data for ICCAT tagged species.
    """

    species = iccat_species()
    bathy_path = get_bathy_path()

    csv_paths = species.map(
        lambda mapped_species: iccat_download_and_qc(mapped_species, bathy_path)
    )

    iccat_pseudoabs(iccat_combine(csv_paths.collect()), bathy_path)


@repository
def iccat_repository():
    return [iccat]
