#!/bin/bash

# SPDX-FileCopyrightText: 2020 Stefano Zacchiroli <zack@upsilon.cc>
# SPDX-License-Identifier: GPL-3.0-or-later

# Depends: pv, zstd

# if [ -z "$1" -o -z "$2" ] ; then
if [ -z "$1" ] ; then
    echo "Usage: cleanup.sh AUTHORS.csv.zst"
    exit 2
fi

# for dataset in "$1" "$2" ; do
dataset="$1"
logfile=${dataset/.csv.zst/--clean.csv.log}
cleanfile=${dataset/.csv.zst/--clean.csv.zst}

if [ -f "$cleanfile" ] ; then
    echo "destination file ${cleanfile} already exists, abort."
    exit 1
fi

# if [ "$dataset" = "$1" ] ; then
#     smudger="./smudge_commits.py"
# elif [ "$dataset" = "$2" ] ; then
smudger="./smudge_authors.py"
# else
#     echo "Unknown dataset: ${dataset}, abort."
#     exit 2
# fi

pv "$dataset" | unzstd | ( "$smudger" 2> "$logfile" ) | zstdmt > "$cleanfile"
# done
