const simpleGit = require('simple-git');
const { execSync } = require('child_process');
const fs = require('fs');
const createCsvWriter = require('csv-writer').createObjectCsvWriter;

const REPO = '/Users/pautasso/GIT/usi/school/2025-USI-MSDE-SA/2025-git/repos/sa-GauthamG2';

const git = simpleGit(REPO);
const csvWriter = createCsvWriter({
  path: 'commit_file_sizes.csv',
  header: [
    { id: 'timestamp', title: 'Timestamp' },
    { id: 'file', title: 'File' },
    { id: 'size', title: 'Size (bytes)' }
  ]
});

async function getAllCommits() {
  const log = await git.log();
  return log.all.map(commit => ({
    hash: commit.hash,
    date: commit.date
  }));
}

function getFileListAtCommit(commitHash) {
  try {
    const output = execSync(`cd ${REPO} && git diff-tree --no-commit-id --name-only -r ${commitHash}`).toString();
    return output.split('\n').filter(line => line.trim() !== '');
  } catch (err) {
    console.error(`Failed to get file list at ${commitHash}: ${err}`);
    return [];
  }
}   

function getFileSizeAtCommit(commitHash, filePath) {
  try {
    const sizeOutput = execSync(`cd ${REPO} && git cat-file -s "${commitHash}:${filePath}"`).toString();
    return parseInt(sizeOutput.trim(), 10);
  } catch (err) {
    // If file was deleted or inaccessible at this commit
    return null;
  }
}

async function extractData() {
  const commits = await getAllCommits();
  const records = [];

  for (const commit of commits) {
    console.log(`Processing commit ${commit.hash}...`);
    const files = getFileListAtCommit(commit.hash);

    for (const file of files) {
      const size = getFileSizeAtCommit(commit.hash, file);
      if (size !== null) {
        records.push({
          timestamp: commit.date,
          file,
          size
        });
      }
    }
  }

  await csvWriter.writeRecords(records);
  console.log('CSV file written successfully as commit_file_sizes.csv');
}

extractData();

