#!/bin/bash

set -o nounset
set -o errexit
set -o pipefail

## V.HANOKA, juillet 2010.
# Script permettant d'obtenir différentes informations sur
# les données produites lors de l'évaluation du/des tagger(s):
#
#     * Le graphique des scores en fonction des échantillons
#       pour chaque langue (data/ALL-RESULTS-GRAPH.png),
#
#     * Les données numériques correspondantes et leur sommaire
#       (data/ALL-RESULTS.txt),
#
#     * Le classement des erreurs par ordre de fréquence décroissant
#       pour chaque langue *L* indiquée (data/ALL-ERRORS-*L*.txt),
#
#     * Les matrices de confusion (data/ALL-MATRIX-*L*.png) et la correspondance
#       des indices des axes et des tags (data/ALL-MATRIX-*L*.corresp).
#     
#     * Une synthèse de ces informations dans un fichier pdf.

# Position of this script on the computer
EVAL_PATH=`dirname $0`

source $( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )/lima-pelf-functions.sh


if (($#==0)); then 
    echo usage: detailed-res.sh nb-slices language 
fi
date=$(date);

# rm -f data/*

nbParts=$1
lang=$2
shift 2

if [ -z "$(which R 2>/dev/null)" ]
then
  echo "******* THIS SCRIPT NEEDS R (for des graphs) *********"
  echo "The package is named  \`R-base' under Mageia."
  echo "Thanks to install it before running this script again."
  exit
fi

###################
# Extract results #
###################

method=$(readMethod $lang)
echo -e "Method is $method"

# rm -Rf results.$lang.$method/data
install -d results.$lang.$method/data
destfile=results.$lang.$method/data/ALL-RESULTS.txt
graphfile=results.$lang.$method/data/ALL-RESULTS-GRAPH.png

rm -f $destfile;
touch $destfile;
echo -n -e "${lang}.${method}.micro\t${lang}.${method}.macro" >> $destfile;
echo  >> $destfile;
for sample in $(seq  1 $nbParts); do
    micro=$($EVAL_PATH/eval.pl results.$lang.$method/$sample/aligned | grep "precision" | grep -v "all"  | gawk -F "precision\t" '{print $2}' | sed -e 's/%.*//g')
    macro=$($EVAL_PATH/eval.pl results.$lang.$method/$sample/aligned.macro | grep "precision" | grep -v "all"  | gawk -F "precision\t" '{print $2}' | sed -e 's/%.*//g')

    echo -n -e "$micro\t$macro\t"
    echo -n -e "$micro\t$macro\t">> $destfile;
    echo >> $destfile;
done

################
# Plot results #
################

plot=""; args="'$lang.${method}.micro', '$lang.${method}.macro'"; cols="'blue', 'red'"; cpt=2;
col=( "" "" "red" "green" "darkgray" "darkorange" "blue4" "black" "cyan4" "darkgreen");
while ((cpt <= $#))
do
    plot="$plot;lines(data[['${!cpt}']], type='b', col='${col[$cpt]}')";
    args="$args, '${!cpt}'"; cols="$cols, '${col[$cpt]}'"; 
    ((cpt+=1));
done
summary=$(R --vanilla --slave  <<< "data <- read.table('$destfile', header=T);png('$graphfile');plot(data[['$lang.${method}.micro']], type='b', col='blue', ylim=c (0, 100), xlim=c(1, $nbParts), xlab='Sample', ylab='Precision', main='$date'); lines(data[['$lang.${method}.macro']], type='b', col='red'); grid(nx=3, ny=50, lty='dotted', equilogs=FALSE)$plot;legend(1, 40, c($args), col=c($cols), pch=1, lty=3);summary(data);";)
echo -e "\n\n\n------------------------------\n\t\t\tSUMMARY\n-------------------------------\n\n$summary">> $destfile;

echo -e "\nProduced files:\n   *  Evaluation scores ---> $destfile\n   *  Corresponding Graph    ---> $graphfile";

#######################################
# Errors Ranking and Confusion Matrix #
#######################################

echo -n "   *  Errors classification for $lang";
$EVAL_PATH/erreursFrequentes.pl $lang $method $nbParts

echo "$EVAL_PATH/confusionMatrix.sh $lang $method $nbParts"
$EVAL_PATH/confusionMatrix.sh $lang $method $nbParts

echo "$EVAL_PATH/toPDF.sh synthese.$lang.$method.pdf $lang $method;"
$EVAL_PATH/toPDF.sh synthese.$lang.$method.pdf $lang $method

echo "$0 DONE"