
import { parse } from "csv-parse";

import fs from 'fs-extra';

// const INPUT_CSV = 'wfgh3.csv';
// const OUTPUT_FOLDER = 'output/wfgh3/';

const INPUT_CSV = 'oas.commits.metrics.clean.csv';
const OUTPUT_CSV = 'oas.commits.metrics.clean.filter5.csv';

const MIN_COMMITS = 5;

// const INPUT_CSV = '/Volumes/T9/apis/apis.commits.csv';

let api_spec_ids = {};
let reverse_api_spec_ids = [];

function cleanup(data) {

    let years = {};

    data = data.filter(d=>{
        let commit_date = dayjs(d.commit_date);
        // let commit_date = dayjs.unix(parseInt(d.commit_date));
        // let commit_date = dayjs.unix(parseInt(d.date));

        years[commit_date.year()] = years[commit_date.year()] || 0;
        years[commit_date.year()]++;

        if (commit_date.year() >= 2026 || commit_date.year() < 2010) {
            console.log("Bad date", d.commit_date);
            return false;
        }
        return true;
    });

    console.log(years);

    // fs.writeJSONSync(OUTPUT_FOLDER + "hist_years.json", years);

    //map the repo + file_path to a unique id

    data.forEach(d => {

        if (d.api_spec_id !== undefined) {
            d.file_path = d.api_spec_id;
            d.repository = d.owner + ":" + d.repo_name;
        } 

        //PATCH for missing file_path
        if (d.file_path === undefined) {
            d.file_path = d.name.split("_")[1];
        }
        
        api_spec_ids[d.repository + ":" + d.file_path] = api_spec_ids[d.repository + ":" + d.file_path] || 0;
        api_spec_ids[d.repository + ":" + d.file_path]++;
    });

    // fs.writeJSONSync(OUTPUT_FOLDER + "api_spec_ids.json", api_spec_ids);

    let api_spec_ids_keys = new Map(); 
    
    Object.keys(api_spec_ids).forEach((k,i)=>{
        api_spec_ids_keys.set(k,i);
        reverse_api_spec_ids[i] = k;
    });

    data.forEach(d => {
        d.api_spec_id = api_spec_ids_keys.get(d.repository + ":" + d.file_path);
    });

    let h_methods_count = {};
    data.forEach(d => {
        h_methods_count[d.methods_count] = h_methods_count[d.methods_count] || 0;
        h_methods_count[d.methods_count]++;
    })

    console.log("Methods count", h_methods_count);

    return data;
}

const parser = parse({ columns: true,
    skip_empty_lines: true}, function (err, csvdata) {

    console.log("Commits: ", csvdata.length);

    let cleandata = cleanup(csvdata);

    const {api_stats, data} = normalize(cleandata);

    let filtered_data = data.filter(d=>api_stats[d.api_spec_id].count > MIN_COMMITS);

    let records = ['api_spec_id,commit_date,methods_count'];

    filtered_data.forEach(d => {
        records.push([d.api_spec_id, d.commit_date, d.methods_count].join(','));
    });

    console.log("Filtered "+MIN_COMMITS, records.length);

    fs.writeFileSync(OUTPUT_CSV, records.join('\n'));

});

fs.createReadStream(INPUT_CSV).pipe(parser);

import dayjs from 'dayjs';
import minMax from 'dayjs/plugin/minMax.js';

dayjs.extend(minMax);

function normalize(data) {

    //apis
    let dates = data.map(d=>dayjs(d.commit_date));

    //github
    // let dates = data.map(d=>dayjs.unix(d.commit_date));

    let minDate = dayjs.min(dates);
    let maxDate = dayjs.max(dates);

    console.log("Dates:", minDate, maxDate);

    let range = maxDate.diff(minDate, 'day', true);

    let diff = dates.map(d=>d.diff(minDate, 'day', true));

    let ndates = diff.map(d=>d/range);

    console.log("Date range:", range);

    let apis = Array.from(new Set(data.map(d=>parseInt(d.api_spec_id))));

    console.log("APIs", apis.length);

    let stats = {
        api_count: apis.length,
        commit_count: data.length,
        min_date: minDate,
        max_date: maxDate,
        range_date: range
    }

    let api_stats = {};

    data.forEach((d,i)=>{

        if(api_stats[d.api_spec_id] === undefined) {
            api_stats[d.api_spec_id] = {first_commit: dates[i], last_commit: dates[i], count: 1, ndatesum: ndates[i], methods_count: parseInt(d.methods_count || "0") };
        } else {
            api_stats[d.api_spec_id].first_commit = dayjs.min([api_stats[d.api_spec_id].first_commit, dates[i]]);
            api_stats[d.api_spec_id].last_commit = dayjs.max([api_stats[d.api_spec_id].last_commit, dates[i]]);
            api_stats[d.api_spec_id].count++;
            api_stats[d.api_spec_id].ndatesum += ndates[i];
            api_stats[d.api_spec_id].methods_count += parseInt(d.methods_count || "0");
        }

    });

    Object.keys(api_stats).forEach(k=>{

        api_stats[k].age = api_stats[k].last_commit.diff(api_stats[k].first_commit, 'day', true);
        api_stats[k].mid_commit = api_stats[k].first_commit.add(api_stats[k].last_commit.diff(api_stats[k].first_commit, 'day', true) / 2, "day");

    });

    data.forEach((d,i)=>{

        d.commit_age = dates[i].diff(api_stats[d.api_spec_id].first_commit, 'day', true);
        d.commit_age_reverse = -dates[i].diff(api_stats[d.api_spec_id].last_commit, 'day', true);

        if (api_stats[d.api_spec_id].age == 0) {
            d.normalized_commit_age = 0;
        } else {
            d.normalized_commit_age = d.commit_age / api_stats[d.api_spec_id].age;
            //console.log(data.commit_age, data.normalized_commit_age, api_stats[d.api_spec_id].age);
        }

        if (dates[i] == api_stats[d.api_spec_id].first_commit) {
            api_stats[d.api_spec_id].methods_count_first = parseInt(d.methods_count || "0");
        }

        if (dates[i] == api_stats[d.api_spec_id].last_commit) {
            api_stats[d.api_spec_id].methods_count_last = parseInt(d.methods_count || "0");
        }

    });

    Object.keys(api_stats).forEach(k=>{

        api_stats[k].delta_methods_count = api_stats[k].methods_count_last - api_stats[k].methods_count_first;

    });

    return {data, apis, api_stats, ndates, dates};

}
