#!/usr/bin/env python
"""
combine.py

Given a source directory find all the CSVs in it and subdirectories,
and combine them to a single file

Usage:
    combine.py /pfs/extract/ /pfs/out/ctag.csv
"""
import argparse
import pathlib

import pandas as pd

from facet_shared.logger import logger


def main():
    """
    Collect CSVs from a directory and all it's subdirectories and combine into a single file
    """
    args = parse_arguments()

    df: pd.DataFrame = None

    for csv_path in args.input_dir.glob("**/*.csv"):
        logger.info(f"Appending {csv_path}")
        single_df = pd.read_csv(csv_path)

        try:
            df = df.append(single_df)
        except AttributeError:
            df = single_df

    logger.info(f"Writing combined CSVs to {args.output_csv}")
    df.to_csv(args.output_csv, index=False)


class Arguments(argparse.Namespace):
    input_dir: pathlib.Path
    output_csv: pathlib.Path


def parse_arguments() -> Arguments:
    parser = argparse.ArgumentParser(
        description="Combine a directory and it's subdirectories of CSVs into a single file"
    )
    parser.add_argument(
        "input_dir", help="Input directory of CSV files", type=pathlib.Path
    )
    parser.add_argument("output_csv", help="Path to output CSV", type=pathlib.Path)

    return parser.parse_args()


if __name__ == "__main__":
    main()
