import datetime
import pytz
import re

import xarray as xr     # noqa
import pandas as pd # noqa
from main.models import Bucket # noqa

from .._api import GeoServerApi

# Patterns and their datetime formats
DATE_PATTERNS = [
    (r"\d{8}", ["%Y%m%d", "%d%m%Y"]),  # Try YYYYMMDD then DDMMYYYY
    (r"\d{6}", ["%Y%m", "%m%Y"]),  # Try YYYYMM then MMYYYY
    (r"\d{4}-\d{2}-\d{2}", ["%Y-%m-%d"]),
    (r"\d{4}_\d{2}_\d{2}", ["%Y_%m_%d"]),
    (r"\d{2}-\d{2}-\d{4}", ["%d-%m-%Y"]),         
    (r"\d{2}_\d{2}_\d{4}", ["%d_%m_%Y"]),
    (r"\d{4}", ["%Y"]),  # Just year
]


class File:

    def __init__(self, bucket: Bucket, relative_path: str, file_size: int=None, last_modified=None):
        self.bucket: Bucket = bucket
        self.relative_path: str = relative_path
        self.absolute_path: str = '{}/{}'.format(bucket.name, self.relative_path)
        self.size = file_size
        self.last_modified = last_modified

        self.netcdf_variables = []
        self.netcdf_dates = []


    def is_netcdf(self) -> bool:
        return self.relative_path.endswith('.nc')


    def is_geotiff(self) -> bool:
        if not self.relative_path.endswith('*.tif') and self.relative_path.endswith('.tif'):
            return True
        return False


    def get_path(self):
        pieces = self.relative_path.split("/")
        return "/".join(pieces[:-1])


    def is_csv(self) -> bool:
        return self.relative_path.endswith('.csv')


    def is_wms_file(self):
        suffix = self.bucket.wms_file_suffix
        if suffix:
            for file_type in ["tif", "nc"]:
                ending = "{}.{}".format(suffix.lower(), file_type)
                if self.relative_path.lower().endswith(ending):
                    return True
            return False
        else:
            return True


    def is_shapefile_for_import(self):
        suffix = self.bucket.shp_file_suffix
        if suffix:
            ending = "{}.zip".format(suffix.lower())
        else:
            ending = ".zip"

        if self.relative_path.lower().endswith(ending):
            return True
        else:
            return False


    @staticmethod
    def create_by_absolute_path(b: Bucket, absolute_path: str):
        relative_path = absolute_path.removeprefix(b.name + "/")
        return File(b, relative_path, 0)


    @staticmethod
    def create_by_s3_obj(b: Bucket, s3_file):
        return File(b, s3_file.object_name, s3_file.size, s3_file.last_modified)


    def is_newer_than(self, date: datetime.datetime) -> bool:
        return self.last_modified > date.astimezone(pytz.utc)


    def read_netcdf_data(self, fs):

        if self.is_netcdf():

            self.netcdf_variables = []

            with fs.open(self.absolute_path, 'rb') as netcdf_file:
                try:
                    ds = xr.open_dataset(netcdf_file, engine='h5netcdf')
                    for k, v in ds.data_vars.items():

                        if k in ['easting', 'northing', 'time_bnds']:  # avoid unnecessary records
                            continue

                        self.netcdf_variables.append(k)

                    time_values = pd.to_datetime(ds['time'].values)
                    self.netcdf_dates = sorted({t.date().isoformat() for t in time_values})
                except KeyError:
                    print("File '{}' has no dimension 'time'.".format(self.absolute_path))
                except (TypeError, ValueError):
                    print("Error: File '{}' cannot be read.".format(self.absolute_path))


    def get_filename(self):
        return self.relative_path.split("/")[-1]


    def get_base_name(self):
        base_name = self.get_filename()
        for pattern, _ in DATE_PATTERNS:
            base_name = re.sub(pattern, "<DATE>", base_name)
        return base_name


    def find_date(self, formatted_output=False):
        now = datetime.datetime.now()

        for pattern, formats in DATE_PATTERNS:
            matches = re.findall(pattern, self.get_filename())  # Checks if a regex pattern (like \d{8}) matches part of the filename
            for date_str in matches:
                for fmt in formats:  # loops through all possible date formats that might apply to this pattern
                    try:
                        date = datetime.datetime.strptime(date_str, fmt)
                        if fmt == "%Y" and not (1900 <= int(date_str) <= now.year + 100):
                            continue
                        return date.strftime("%Y-%m-%d") if formatted_output else date_str
                    except ValueError:
                        continue
        return None


    def get_imagemosaic_path(self):
        return self.bucket.name + "/" + self.get_path() + "/*.tif"


    def get_unique_store_name(self, api: GeoServerApi, variable: str=None):

        relative_path = self.relative_path.lower()

        # Remove detected date before replacing characters/splitting
        date_str = self.find_date()
        if date_str:
            relative_path = relative_path.replace(date_str, "")

        for char in [".", "-", "/"]:
            relative_path = relative_path.replace(char, "_")

        parts = [p for p in relative_path.split("_") if p]  #relative_path.split("_")

        # append variable name even if it might be in parts (to get a difference if a file contains multiple variables)
        if variable:
            var_lower = variable.lower()
            var_parts = var_lower.split("_")
            parts = parts + var_parts

        unique_parts = []
        for part in parts:
            if part not in unique_parts and part not in ["nc", "tif", "shp", "zip", "cog"]:
                unique_parts.append(part)

        name = "_".join(unique_parts)

        if len(name) <= 63:
            return self.try_name_against_api(api, name, 0)

        # Truncate parts longer than 8 characters
        truncated_parts = [part[:8] if len(part) > 8 else part for part in unique_parts]

        # Remove parts with fewer than 3 characters
        reduced_parts = [part for part in truncated_parts if len(part) >= 3]

        capitalized_parts = [part.capitalize() for part in reduced_parts]

        base_name_full = ''.join(capitalized_parts) # join now without underscores

        return self.try_name_against_api(api, base_name_full, 0)


    def try_name_against_api(self, api: GeoServerApi, base_name_full: str, counter: int):

        suffix = '' if counter == 0 else f"_{chr(65 + counter)}" if counter <= 25 else (_ for _ in ()).throw(ValueError("Too many attempts, no available suffix"))

        result = base_name_full[:63 - len(suffix)] + suffix

        if self.is_netcdf() or self.is_geotiff():
            if not api.is_coveragestore_existing(self.bucket.name, result):
                return result

        if self.is_shapefile_for_import():
            if not api.is_datastore_existing(self.bucket.name, result):
                return result

        counter += 1

        return self.try_name_against_api(api, base_name_full, counter)
