// #![deny(warnings)]
use calamine::{open_workbook, Reader, Xlsx};
use git2::Repository;
use git2::{Error, ErrorCode};
use indexmap::{IndexMap};
use log::{error};

use rayon::iter::{ParallelBridge, ParallelIterator};

use std::sync::{Arc, RwLock};

use std::str;
use std::time::Duration;
use structopt::StructOpt;

mod commits_metrics;
mod emails;
mod metrics;
mod repo;
mod sokrates_metrics;
mod statistics;
mod tokei_metrics;
mod utils;
use crate::repo::*;
use crate::statistics::*;
use crate::utils::*;

#[derive(StructOpt)]
pub struct Args {

    #[structopt(name = "threads", long)]
    /// number of threads
    flag_threads: Option<usize>,
    #[structopt(name = "download-emails", long)]
    /// download all projects' emails
    flag_download_emails: bool,
    #[structopt(name = "project", long)]
    /// only parse given project
    flag_parse_single_project: Option<String>,
    #[structopt(name = "list-projects", long)]
    /// only show projects
    flag_list_projects: bool,
    #[structopt(name = "skip-sokrates", long)]
    /// skip sokrates analysis
    flag_skip_sokrates: bool,
    #[structopt(name = "commit-messages", long)]
    /// extract commits messages
    flag_commit_messages: bool,
    #[structopt(name = "missing-emails", long)]
    /// check for any missing email archives
    flag_missing_emails: bool,

}

/// Get a list of projects from the XML file
fn list_projects() -> indexmap::IndexSet<Project> {
    let path = "../../projects-info-from-podlings-xml-extra-metadata.xlsx";
    let mut workbook: Xlsx<_> = open_workbook(path).expect("Cannot open file");
    let mut projects = indexmap::IndexSet::<Project>::new();
    // Read whole worksheet data and provide some statistics
    if let Some(Ok(range)) = workbook.worksheet_range("projects") {
        let status_col = 2;
        let start_date_col = 5;
        let end_date_col = 6;
        let github_url_col = 7;
        
        for row in range.rows() {
            let status = row[status_col].get_string().unwrap().to_string();
            let start_date = row[start_date_col].get_string().unwrap().to_string();

            if status == "graduated".to_string() || status == "retired" {
                let end_date = row[end_date_col].get_string().unwrap().to_string();
                let github_url = row[github_url_col].get_string();

                if github_url.is_some() && !github_url.unwrap().to_string().is_empty() {
                    // repo_name, repo_path, start_date, end_date
                    let repo = github_url
                        .unwrap()
                        .to_string()
                        .split("/")
                        .last()
                        .unwrap()
                        .to_string();
                    let repo_path = format!("../../projects/git/{}", repo);
                    let repo_name = repo
                        .replace("incubator-retired-", "")
                        .replace("incubator-", "")
                        .trim()
                        .to_string();
                    projects.insert(Project {
                        name: repo_name,
                        path: repo_path,
                        start_date,
                        end_date,
                        status,
                    });
                }
            }
        }
    }
    projects
}

fn _show_branch(repo: &Repository, repo_name: &str) -> Result<(), Error> {
    let head = match repo.head() {
        Ok(head) => Some(head),
        Err(ref e) if e.code() == ErrorCode::UnbornBranch || e.code() == ErrorCode::NotFound => {
            None
        }
        Err(e) => return Err(e),
    };
    let head = head.as_ref().and_then(|h| h.shorthand());

    println!("{}: {}", repo_name, head.unwrap_or("HEAD (no branch)"));

    Ok(())
}

#[derive(PartialEq, Eq, Hash, Clone, Debug)]
struct Project {
    name: String,
    path: String,
    start_date: String,
    end_date: String,
    status: String,
}


/// Used for some manual tests. Code not stable, use with caution
fn analyze_test_project(project: String) {
    let args = Args::from_args();
    let writer = Arc::new(RwLock::new(
        csv::WriterBuilder::default()
            .has_headers(true)
            .from_path(format!("{project}.csv"))
            .unwrap(),
    ));
    let java_path = java_path();
    let projects = list_projects();
    projects.iter().filter(|x| x.name == project).for_each(|p| {
        let git_repo = Repository::open(p.path.as_str());
        if let Ok(git_repo) = git_repo {
            let repo = Repo::new(
                &git_repo,
                p.name.as_str(),
                p.start_date.as_str(),
                p.end_date.as_str(),
                p.status.as_str(),
            )
            .unwrap();
            // println!("{:?}", repo.commits.len());
            repo.checkout_master_main_trunk();
            
            let mut stats = Stats::new(p.name.as_str(), &repo, &java_path);
            let metrics = stats.compute_statistics(&args);
            if let Ok(metrics) = metrics {
                let writer = writer.clone();
                let mut guard = writer.write().expect("Unable to lock");
                for m in metrics {
                    guard.serialize(m).unwrap();
                }
            } else {
                error!("{} cannot extract the metrics", p.name.as_str());
            }
        } else {
            error!(
                "Cannot find the git repository {} at {}",
                p.name.as_str(),
                p.path.as_str()
            );
        }
    });
}

/// Remove the temporary files and folder that sokrates creates
fn remove_sokrates_temp(repo: &Repository) {
    std::fs::remove_dir_all(format!(
        "{}/_sokrates",
        repo.path().parent().unwrap().to_str().unwrap().to_string()
    ));
    std::fs::remove_file(format!(
        "{}/git-history.txt",
        repo.path().parent().unwrap().to_str().unwrap().to_string()
    ));
}

/// Check for any missing emails in the projects/emails folder
fn check_for_missing_emails() {
    let projects = list_projects();
    let emails_folder = "../../projects/emails";
    projects.iter().par_bridge().for_each(|p| {
        let git_repo = Repository::open(p.path.as_str());
        if let Ok(git_repo) = git_repo {
            let repo = Repo::new(
                &git_repo,
                p.name.as_str(),
                p.start_date.as_str(),
                p.end_date.as_str(),
                p.status.as_str(),
            );
            if let Ok(repo) = repo {
                log::info!("Checking repo {}", repo.project.to_lowercase());
                for (_key, month) in repo.dates_to_months() {
                    let path = format!(
                        "{}/{}-dev-{}.mbox",
                        emails_folder,
                        repo.project.to_lowercase(),
                        month
                    );
                    let email_path = std::path::Path::new(path.as_str());

                    if email_path.exists() && email_path.metadata().unwrap().len() == 0 {
                        log::error!(
                            "{} - email archive {}-dev-{}.mbox is empty",
                            repo.project.to_lowercase(),
                            repo.project.to_lowercase(),
                            month
                        );
                    }
                    if !email_path.exists() {
                        log::error!(
                            "{} - email archive {}-dev-{}.mbox does not exist",
                            repo.project.to_lowercase(),
                            repo.project.to_lowercase(),
                            month
                        );
                    }
                }
            }
        }
    });
}

/// Get the commits message for all incubation months for the projects
fn commits_messages(data_folder_path: &str) {
    let projects = list_projects();
    let mut writer = csv::WriterBuilder::default()
        .has_headers(true)
        .from_path(format!("{}/commit-messages.csv", data_folder_path))
        .unwrap();

    projects.iter().for_each(|p| {
        let git_repo = Repository::open(p.path.as_str());
        if let Ok(git_repo) = git_repo {
            // sometimes if we kill the program, some temp sokrates files might remain
            remove_sokrates_temp(&git_repo);
            let repo = Repo::new(
                &git_repo,
                p.name.as_str(),
                p.start_date.as_str(),
                p.end_date.as_str(),
                p.status.as_str(),
            );

            if let Ok(repo) = repo {
                let checkout = repo.checkout_master_main_trunk();
                if let Ok(_checkout) = checkout {
                    for (month, commits) in repo.inc_month_commits {
                        log::info!(
                            "{} - month: {} found {} commits",
                            p.name.as_str(),
                            month,
                            commits.len()
                        );
                        for c in commits {
                            match writer.serialize(CommitMessage {
                                project: p.name.to_string(),
                                status: p.status.to_string(),
                                inc_month: month,
                                sha: c.id().to_owned().to_string(),
                                message: c.message().unwrap_or("").to_string(),
                            }) {
                                Ok(()) => {}
                                Err(_e) => {
                                    error!("cannot serialize commit message");
                                }
                            }
                        }
                    }
                }
            }
        }
    });
}

pub fn java_path() -> String {
    let java_path = match std::env::var("JAVA_HOME") {
        Ok(p) => {
            if std::env::consts::OS == "windows" {
                log::info!("OS detected: Windows");
                format!("{}\\bin\\java", p)
            }   

            else if std::env::consts::OS == "linux" {
                log::info!("OS detected: Linux");
                if p.ends_with("/") {
                    format!("{}java", p)
                } else {
                    format!("{}/java", p)
                }
                
            }
            else {
                log::info!("OS is different than Windows or Linux, defaulting to command java. If this command is not available in your system, you have to install java and make it accessible");
                "java".to_string()
            }
        }
        Err(_e) => "java".to_string(),
    };
    java_path
}

fn main() -> Result<(), Box<dyn std::error::Error>> {
    // **** LOGGING SETUP **** //
    let start = std::time::Instant::now();
    log4rs::init_file("log4rs.yaml", Default::default()).unwrap();
    log::info!("Booting up");

    let args = Args::from_args();

    let projects = list_projects();
    let projects = projects
        .into_iter()
        .filter(|x| {
            x.name != "ODFToolkit" || x.name != "commons-ognl" || !x.name.contains("myfaces")
        }) // ognl is now a project under commons, so hard to get only their emails
        // myfaces we have trinidad and tobago, but we cannot get emails from there, just the general myfaces emails. we remove these
        .collect::<indexmap::IndexSet<_>>();

    let data_folder_path = "data";

    std::fs::create_dir(data_folder_path);

    if args.flag_parse_single_project.is_some() {
        analyze_test_project(args.flag_parse_single_project.unwrap());
        return Ok(());
    }

    if args.flag_list_projects {
        for p in projects {
            println!("{:#?}", p.name.to_lowercase());
        }

        return Ok(());
    }

    if args.flag_missing_emails {
        check_for_missing_emails();
        let duration = start.elapsed();
        let seconds = duration.as_secs() % 60;
        let minutes = (duration.as_secs() / 60) % 60;
        let hours = (duration.as_secs() / 60) / 60;
        log::info!("Analysis completed in {}h:{}m:{}s", hours, minutes, seconds);

        return Ok(());
    }

    if args.flag_commit_messages {
        commits_messages(data_folder_path);
        let duration = start.elapsed();
        let seconds = duration.as_secs() % 60;
        let minutes = (duration.as_secs() / 60) % 60;
        let hours = (duration.as_secs() / 60) / 60;
        log::info!("Analysis completed in {}h:{}m:{}s", hours, minutes, seconds);

        return Ok(());
    }

    let threads = args.flag_threads.unwrap_or(4);
    log::info!("Using {} threads", threads);
    rayon::ThreadPoolBuilder::new()
        .num_threads(threads)
        .build_global()
        .unwrap();

    if args.flag_download_emails {
        let emails_folder = "../../projects/emails";
        
        let make_dir = std::fs::create_dir_all(emails_folder);
        if let Err(result) = make_dir{
            log::error!("Sorry, cannot create project/emails directories. Make sure you have writing access?. Original error was {}", result.to_string());
            return Ok(());
        }
        // project name, and the email name
        let projects_names_fix = IndexMap::from([
            ("apex-core", "apex"),
            ("ant-ivy", "ant"),
            ("derby", "db-derby"),
            ("empire-db", "empire"),
            // ("ftpserver", "incubator-ftpserver"),
            // ("hcatalog", "incubator-hcatalog"),
            // ("ant-ivy", "incubator-ivy"),
            // ("kalumet", "incubator-kalumet"),
            ("lucene.net", "lucenenet"),
            ("mynewt-core", "mynewt"),
            // ("npanday", "incubator-npanday"),
            // ("nuvem", "incubator-nuvem"),
            // ("odftoolkit", "incubator-odf"),
            // ("photark", "incubator-photark"),
            ("pluto", "portals-pluto"),
            ("creadur-rat", "creadur"),
            // ("s4", "incubator-s4"),
            // ("sanselan", "incubator-sanselan"),
            // ("servicecomb-java-chassis", "servicecomb"),
            // ("tashi", "incubator-tashi"),
            ("warble-server", "warble"),
            // ("wave", "incubator-wave"),
            // ("zetacomponents", "incubator-zeta"),
        ]);
        let agent = ureq::AgentBuilder::new()
            .timeout_read(Duration::from_secs(15))
            .timeout_write(Duration::from_secs(300))
            .build();
        projects.iter().par_bridge().for_each(|p| {
            let git_repo = Repository::open(p.path.as_str());
            if let Ok(git_repo) = git_repo {
                let repo = Repo::new(
                    &git_repo,
                    p.name.as_str(),
                    p.start_date.as_str(),
                    p.end_date.as_str(),
                    p.status.as_str(),
                );
                if let Ok(repo) = repo {
                    for (_key, month) in repo.dates_to_months() {
                        let path = format!(
                            "{}/{}-dev-{}.mbox",
                            emails_folder,
                            repo.project.to_lowercase(),
                            month
                        );
                        let email_path = std::path::Path::new(path.as_str());
                        if !email_path.exists() {
                            // let url = format!("https://lists.apache.org/api/mbox.lua?list=dev&domain={}.apache.org&d={}-{}",
                            let url = format!(
                                "https://mail-archives.apache.org/mod_mbox/{}-dev/{}.mbox",
                                projects_names_fix
                                    .get(repo.project.to_lowercase().as_str())
                                    .unwrap_or(&repo.project.to_lowercase().as_str()),
                                month
                            );
                            let res = agent.get(&url).call();

                            if let Ok(res) = res {
                                if res.status() == 200 {
                                    
                                    let mut file = std::fs::File::create(&path)
                                        .expect("Cannot create file {filename}");
                                    let result = std::io::copy(&mut res.into_reader(), &mut file);

                                    if result.is_err() {
                                        log::error!(
                                            "{} - cannot download email archive {}",
                                            repo.project.to_lowercase(),
                                            &month
                                        );
                                    } else {
                                        log::info!(
                                            "{} - downloaded email archive {} ",
                                            repo.project.to_lowercase(),
                                            &month
                                        );
                                    }
                                } else {
                                    log::error!(
                                        "{} - cannot download email archive {}",
                                        repo.project.to_lowercase(),
                                        &month
                                    );
                                }
                            } else {
                                log::error!(
                                    "{} - cannot download email archive {}",
                                    repo.project.to_lowercase(),
                                    &month
                                );
                            }
                        }
                    }
                }
            }
        });

        let duration = start.elapsed();
        let seconds = duration.as_secs() % 60;
        let minutes = (duration.as_secs() / 60) % 60;
        let hours = (duration.as_secs() / 60) / 60;
        log::info!("Analysis completed in {}h:{}m:{}s", hours, minutes, seconds);

        return Ok(());
    }

    log::info!("Analyzing {} projects", projects.len());
    // // **** ACTUAL LOGIC THAT CALLS FUNCTIONS TO COMPUTE THE METRICS FOR EACH PROJECT **** //
    let java_path = java_path();
    projects.iter().par_bridge().for_each(|p| {
        let git_repo = Repository::open(p.path.as_str());
        if let Ok(git_repo) = git_repo {
            let repo = Repo::new(
                &git_repo,
                p.name.as_str(),
                p.start_date.as_str(),
                p.end_date.as_str(),
                p.status.as_str(),
            );
            if let Ok(repo) = repo {
                // sometimes if we kill the program, some temp sokrates files might remain
                remove_sokrates_temp(&git_repo);
                let checkout = repo.checkout_master_main_trunk();
                if let Ok(_checkout) = checkout {
                    let mut stats = Stats::new(p.name.as_str(), &repo, &java_path);
                    let metrics = stats.compute_statistics(&args);
                    if let Ok(metrics) = metrics {
                        let mut writer = csv::WriterBuilder::default()
                            .has_headers(true)
                            .from_path(format!("{}/{}.csv", data_folder_path, p.name.as_str()))
                            .unwrap();

                        for m in metrics {
                            match writer.serialize(m) {
                                Ok(()) => {}
                                Err(_e) => {
                                    error!("{} - cannot serialize metric value", p.name.as_str());
                                }
                            }
                        }
                    } else {
                        error!("{} cannot extract the metrics", p.name.as_str());
                    }
                } else {
                    error!("{} - cannot reset to main/master/trunk", p.name.as_str());
                }
            } else {
                error!(
                    "{} - cannot parse the repository and extract commits",
                    p.name.as_str()
                );
            }
        } else {
            error!(
                "Cannot find the git repository {} at {}",
                p.name.as_str(),
                p.path.as_str()
            );
        }
    });

    let duration = start.elapsed();
    let seconds = duration.as_secs() % 60;
    let minutes = (duration.as_secs() / 60) % 60;
    let hours = (duration.as_secs() / 60) / 60;
    log::info!("Analysis completed in {}h:{}m:{}s", hours, minutes, seconds);

    Ok(())
}

#[cfg(test)]
mod test {
    use super::*;

    #[test]
    fn test_git_path() {
        let repo = Repository::open("test_resources/git_repo");
        let cwd = format!(
            "{}/test_resources/git_repo",
            std::env::current_dir()
                .unwrap()
                .to_str()
                .unwrap()
                .to_string()
                .replace("\\", "/")
        );
        if let Ok(r) = repo {
            assert_eq!(r.path().parent().unwrap().to_str().unwrap_or(""), cwd);
        }
    }
}
