import re

from main.lib.s3_utils import list_minio_folder # noqa

from main.models import Bucket  # noqa
from . import File


class FileCollection:

    def __init__(self, bucket_name: str, sub_path: str=None):
        self.collection = list(list_minio_folder(bucket_name, sub_path))


    def contains(self, file: File):
        result = False
        for s3_file in self.collection:
            if file.relative_path == s3_file.object_name:
                result = True
        return result


    def get_missing_dates(self, necessary_dates: list[str]):
        if len(self.collection) == 0:
            return necessary_dates

        result = []
        for date in necessary_dates:
            found = False
            for s3_file in self.collection:
                if date in s3_file.object_name:
                    found = True
            if not found:
                result.append(date)
        return result


    def get_unnecessary_dates(self, necessary_dates: list[str]):
        result = []
        for s3_file in self.collection:
            found = False
            for date in necessary_dates:
                if date in s3_file.object_name:
                    found = True
            if not found:
                match = re.search(r'\d{4}-\d{2}-\d{2}', s3_file.object_name)
                if match:
                    result.append(match.group(0))
        return result


    def are_all_with_extension(self, extension: str):
        for s3_file in self.collection:
            if not s3_file.object_name.endswith(extension):
                return False
        return True


    def extract_date_list(self, bucket: Bucket):
        dates = []
        name_bases = set()

        # Extract date for each valid tif file
        for s3_file in self.collection:
            file = File.create_by_s3_obj(bucket, s3_file)
            date = file.find_date(True)
            if date:
                dates.append(date)
                name_bases.add(file.get_base_name())  # if all your files match the same structure, the set will only have one item

        if len(dates) < 2:
            # print("Less than 2 dated files found.")
            return None
        if len(name_bases) > 1:  # if more than one name_bases, not all filename structures match
            # print("Inconsistent naming structure among files.")
            return None

        return dates
