import os
import shutil
import requests
import csv
import zipfile
import glob
from tqdm import tqdm

ARCHIVES = ["TM5287_P.Ryl.2.6.jpg", "TM5291_P.Ryl.2.73.jpg", "TM13019_P.Ryl.4.600.jpg", "TM5287_P.Ryl.2.69.jpg"]

class image_downloader:
    def __init__(self) -> None:
        self.TMP_ARCHIVE_FOLDER = ".tmp"

    def download_image(self, srclink, dstpath):
        res = requests.get(srclink)

        with open(dstpath,'wb') as f:
            f.write(res.content)

        if os.path.basename(dstpath) in ARCHIVES:
            # the downloaded file is an archive
            with zipfile.ZipFile(dstpath, 'r') as zip_ref:
                zip_ref.extractall(self.TMP_ARCHIVE_FOLDER)

            path = glob.glob(os.path.join(self.TMP_ARCHIVE_FOLDER, "**","*.jpg"), recursive=True)

            for img_path in glob.glob(os.path.join(self.TMP_ARCHIVE_FOLDER, "**","*.jpg"), recursive=True):
                shutil.copyfile(img_path, dstpath)

            shutil.rmtree(self.TMP_ARCHIVE_FOLDER)


if __name__ == "__main__":
    out_folder = "dataset__"
    csv_file = "data.csv"

    if os.path.exists(out_folder):
        shutil.rmtree(out_folder)
    os.makedirs(out_folder)

    downloader = image_downloader()

    lines = []
    with open(csv_file, "r") as csv_file:
        data = csv.reader(csv_file, delimiter=',', quotechar='"')
        next(data)
        for line in data:
            lines.append(line)

    for line in tqdm(lines):
        img_name, city, collection, inventory, link = line
        if img_name[0] != '#': 
            tqdm.write(f"Downloading {img_name}...")
            downloader.download_image(link, os.path.join(out_folder, img_name))
        else:
            tqdm.write(f"skip {img_name[1:]}...")


    print('Done!')


