#!/usr/bin/env python

import argparse
import itertools
import pathlib
from typing import Optional

import pandas as pd

from facet_shared.logger import logger


class ParsedArguments(argparse.Namespace):
    input_csv: pathlib.Path
    output_dir: pathlib.Path
    limit: Optional[int]
    start_date: Optional[str]


def main():
    """ Split a given CSV file by dates """
    args = parse_arguments()

    df = pd.read_csv(args.input_csv, parse_dates=["date"])

    # Filtering dates to match range of hycom

    df = df["1992-10-02" < df["date"]]

    if args.start_date:
        logger.info(f"Filtering split files to dates after {args.start_date}")
        df = df[args.start_date <= df["date"]]

    ## end filtering!

    if args.limit:
        logger.info(f"Limiting to {args.limit} dates")
    else:
        logger.info(f"Generating all dates")

    for date, date_df in itertools.islice(df.groupby(df["date"].dt.date), args.limit):
        date_string = str(date)
        date_path = args.output_dir / f"{date_string}.csv"
        date_df.to_csv(date_path, index=False)


def parse_arguments() -> ParsedArguments:
    """ Parse the user's command line input """
    parser = argparse.ArgumentParser(
        description="Split up a source ICCAT CSV file by date"
    )
    parser.add_argument("input_csv", type=pathlib.Path, help="Input ICCAT CSV")
    parser.add_argument("output_dir", type=pathlib.Path, help="Output directory")
    parser.add_argument(
        "--limit",
        type=int,
        default=None,
        help="Limit output to a subset of input dates",
    )
    parser.add_argument(
        "--start_date", type=str, default=None, help="Start at a specific date"
    )

    return parser.parse_args()


if __name__ == "__main__":
    main()
