from openpyxl import Workbook
from openpyxl import load_workbook
import sys
import os
from pathlib import Path
from subprocess import check_output, STDOUT, CalledProcessError
import concurrent.futures
from concurrent.futures import wait




failed_to_clone = list()
already_cloned = list()
cloned_repositories = list()

def dwl(project):
    print("Cloning repository {}".format(project))
    
    try:
        output = check_output(['git', 'clone', project], stderr=STDOUT)
        cloned_repositories.append(project)
    
    except CalledProcessError as exc:
        output =exc.output.decode("utf-8").strip()
        if "fatal: destination path" in output and "already exists and is not an empty directory" in output:
            already_cloned.append(project)
        else:
            failed_to_clone.append(project)

def main():

    project_github_url_cell = "H"
    status_cell = "C"
    if len(sys.argv) != 2:
        print("You need to specify the input xlsx file with all the metadata")
        sys.exit(1)

    filename = sys.argv[1]
    workbook = load_workbook(filename=filename)
    sheet = workbook.active

    project_urls = list()
    for row in sheet.rows:
        row_number = row[0].row
        if row_number > 1: #skip header
            project_url = sheet[str(project_github_url_cell+str(row_number))].value
            status = sheet[str(status_cell+str(row_number))].value
            if project_url is not None and (status == "graduated" or status == "retired"):
                project_urls.append(project_url)

    cwd = os.getcwd()
    root = Path(cwd).parents[0]
    print(root)
    try:
        os.makedirs(os.path.join(root,"projects", "git"))
    except OSError:
        print("Probably already exists the folder projects/git")
    
    os.chdir(os.path.join(root, "projects", "git"))
    pool = concurrent.futures.ProcessPoolExecutor(8)
    futures = []

    for x in project_urls:
        futures.append(pool.submit(dwl, x))

    wait(futures, return_when="ALL_COMPLETED")

    with open("cloned_repos.log", 'w') as f:
        for p in cloned_repositories:
            f.write("{}\n".format(p))

if __name__ == "__main__":
    main()

    
# # with open("clone_log.log", 'w') as f:
# #     for p in set(failed_to_clone).difference(set(already_cloned)):
# #         f.write("Failed to clone repo {}\n".format(p))






      

    
