#!/bin/bash
RUN_ID=#######
WORKING_DIR="#######/${RUN_ID}"

SAMPLE_LIST="pool.csv"
SAMPLE_ID=( $(cut -d ',' -f1 ${SAMPLE_LIST} ) )

STATS_OUT="${WORKING_DIR}/stats"
SPLITTEDREADS_OUT="${STATS_OUT}/splitted_reads"
MAPPEDREADS_OUT="${STATS_OUT}/mapped_reads"
FINALREADS_OUT="${STATS_OUT}/final_reads"

CLIPPED_OUT="${WORKING_DIR}/clipped"
SPLIT_OUT="${WORKING_DIR}/splitted"
UNALIGNED_OUT="${WORKING_DIR}/unaligned"
MAPPING_OUT="${WORKING_DIR}/mapped"
UNIQUE_READS_OUT="${WORKING_DIR}/unique"
FPFRAMING_OUT="${WORKING_DIR}/fpframing"
SHELL_IN_OUT="${WORKING_DIR}/shell_in_out"


echo "beginning statsscript"

mkdir ${STATS_OUT} ${SPLITTEDREADS_OUT} ${MAPPEDREADS_OUT}  ${FINALREADS_OUT} 

echo "rawreads"
date +"%Y/%m/%d %H:%M:%S"


#Raw read count
#fastq file contains 4 line per read, so divide the number by 4
zcat ${WORKING_DIR}/RawFastq/"#######.fastq.gz" | wc -l > ${STATS_OUT}/rawreads.txt &
wait

echo "clippedreads"
date +"%Y/%m/%d %H:%M:%S"


#Filiterization and linker sequence elimination
#fastq file contains 4 line per read, so divide the number by 4
wc -l ${CLIPPED_OUT}/${RUN_ID}_clipped.fastq > ${STATS_OUT}/clippedreads.txt &


echo "splitting, mapping"
date +"%Y/%m/%d %H:%M:%S"

#Splitting ratio

cp ${SPLIT_OUT}/${id}/fates.txt ${SPLITTEDREADS_OUT}/fates.txt

wait


#nc-rna depeletion input and output
for id in ${SAMPLE_ID[@]}
do
        cp ${MAPPING_OUT}/${id}/Log.final.out ${MAPPEDREADS_OUT}/${id}_mappedreads.txt
done
wait

echo "finalreads"
date +"%Y/%m/%d %H:%M:%S"


#count final reads as after duplication removal

for id in ${SAMPLE_ID[@]}
do
        samtools view -F 0x40 ${UNIQUE_READS_OUT}/${id}_unique.bam | cut -f1 | sort -T . | uniq | wc -l > ${FINALREADS_OUT}/${id}_finalreads.txt &
done

wait

echo "exiting statsscript"

mv nohup.out ${SHELL_IN_OUT}/nohup.out.stats_ribo
cp statsscript.sh ${SHELL_IN_OUT}


date +"%Y/%m/%d %H:%M:%S"