#!/bin/bash
RUN_ID=SIMM066_AgFungi_RNA_Trizol
WORKING_DIR="/home/shichino/data/${RUN_ID}"

SAMPLE_LIST="pool_rna.csv"
SAMPLE_ID=( $(cut -d ',' -f1 ${SAMPLE_LIST} ) )

GENOMEINDEX_SOURCE="/home/genome/Colletotrichum_orbiculare/GCA_000350065.2/C.orbiculare_knownGene_STAR"
GENOMEINDEX_SOURCE_MITO="/home/genome/Colletotrichum_orbiculare/Mitochondria_Genome/C.orbiculare_knownGeneMITO_STAR"
DEPLETION_SOURCE="/home/genome/Colletotrichum_orbiculare/GCA_000350065.2/C.orbiculare_rtRNA_STAR"
BED_SOURCE="/home/genome/Colletotrichum_orbiculare/5UTR_annotated/Cob_genes_with_UTRs.sort.1.bed"
BED_SOURCE_MITO="/home/genome/Colletotrichum_orbiculare/Mitochondria_Genome/Cob_mitochondrial.bed"

RAWFASTQ_SOURCE="${WORKING_DIR}/RawFastq/"

CLIPPED_OUT="${WORKING_DIR}/clipped"
UNALIGNED_OUT="${WORKING_DIR}/unaligned"
MAPPING_OUT="${WORKING_DIR}/mapped"
FPCOUNT_OUT="${WORKING_DIR}/fpcount"
SHELL_IN_OUT="${WORKING_DIR}/shell_in_out"

ADAPTER_SEQ=AGATCGGAAGAGCACACGTCTGAACTCCAGTCAC

echo "beginning riboscript"

mkdir ${CLIPPED_OUT} ${UNALIGNED_OUT} ${MAPPING_OUT} ${FPCOUNT_OUT} ${SHELL_IN_OUT}


# adapter removal
echo "task: adapter removal"
date +"%Y/%m/%d %H:%M:%S"
for id in ${SAMPLE_ID[@]}
do
	echo "(nohup fastp -i ${RAWFASTQ_SOURCE}/${id}_1*.fastq.gz -I ${RAWFASTQ_SOURCE}/${id}_2*.fastq.gz -o ${CLIPPED_OUT}/${id}_R1_clipped.fastq -O ${CLIPPED_OUT}/${id}_R2_clipped.fastq -a ${ADAPTER_SEQ} -w 12 -c -j ${CLIPPED_OUT}/${id}.json -h ${CLIPPED_OUT}/${id}.html) &" >> filtercommands.sh
	echo "wait">> filtercommands.sh
done
source filtercommands.sh

# ncRNA depletion
echo "task: ncRNA depletion"
date +"%Y/%m/%d %H:%M:%S"

for id in ${SAMPLE_ID[@]}
do
		echo "(nohup mkdir ${UNALIGNED_OUT}/${id})" >> ncrnadepletioncommands.sh
        echo "(nohup STAR --genomeDir ${DEPLETION_SOURCE} --readFilesIn ${CLIPPED_OUT}/${id}_R1_clipped.fastq ${CLIPPED_OUT}/${id}_R2_clipped.fastq --runThreadN 12 --outFilterMultimapNmax 2000 --outSAMtype BAM SortedByCoordinate --outFileNamePrefix ${UNALIGNED_OUT}/${id}/ --alignIntronMax 1 --outReadsUnmapped Fastx --limitBAMsortRAM 10000000000) &" >> ncrnadepletioncommands.sh
		echo "wait">> ncrnadepletioncommands.sh
done
source ncrnadepletioncommands.sh


# Map rtRNA-depleted reads to fungus genome
echo "task: mapping"
date +"%Y/%m/%d %H:%M:%S"

for id in ${SAMPLE_ID[@]}
do
		echo "(nohup mkdir ${MAPPING_OUT}/${id})" >> starcommands.sh
        echo "(nohup STAR --genomeDir ${GENOMEINDEX_SOURCE} --readFilesIn ${UNALIGNED_OUT}/${id}/Unmapped.out.mate1 ${UNALIGNED_OUT}/${id}/Unmapped.out.mate2 --runThreadN 12 --outSAMtype BAM SortedByCoordinate --outFileNamePrefix ${MAPPING_OUT}/${id}/ --outReadsUnmapped Fastx) &" >> starcommands.sh
		echo "wait">> starcommands.sh

		echo "(nohup mkdir ${MAPPING_OUT}/mito.${id})" >> starcommands.sh
        echo "(nohup STAR --genomeDir ${GENOMEINDEX_SOURCE_MITO} --readFilesIn ${UNALIGNED_OUT}/${id}/Unmapped.out.mate1 --runThreadN 6 --outSAMtype BAM SortedByCoordinate --outFileNamePrefix ${MAPPING_OUT}/mito.${id}/ --limitBAMsortRAM 10000000000) &" >> starcommands.sh
		echo "wait">> starcommands.sh

done
source starcommands.sh




#Extract mapped reads
echo "task: extract mapped reads"
date +"%Y/%m/%d %H:%M:%S"

for id in ${SAMPLE_ID[@]}
do
        echo "(nohup samtools view -b ${MAPPING_OUT}/${id}/Aligned.sortedByCoord.out.bam > ${MAPPING_OUT}/${id}.bam ) &" >> renamingcommands.sh
        echo "(nohup samtools view -b ${MAPPING_OUT}/mito.${id}/Aligned.sortedByCoord.out.bam > ${MAPPING_OUT}/mito.${id}.bam ) &" >> renamingcommands.sh
done
echo "wait">> renamingcommands.sh
source renamingcommands.sh


#Index bam files
echo "task: indexing bam file"
date +"%Y/%m/%d %H:%M:%S"

for id in ${SAMPLE_ID[@]}
do
	echo "(nohup samtools index ${MAPPING_OUT}/${id}.bam ) &" >> indexingcommands.sh
	echo "(nohup samtools index ${MAPPING_OUT}/mito.${id}.bam ) &" >> indexingcommands.sh
done
echo "wait">>indexingcommands.sh
source indexingcommands.sh


#fp-count
echo "task: fp-count"
date +"%Y/%m/%d %H:%M:%S"

for id in ${SAMPLE_ID[@]}
do
       echo "(nohup fp-count -r -c5,5 -o ${FPCOUNT_OUT}/${id}.txt -b ${BED_SOURCE} -a mrna.txt ${MAPPING_OUT}/${id}.bam) &" >> fpcountcommands.sh
       echo "(nohup fp-count -r -c5,5 -o ${FPCOUNT_OUT}/mito.${id}.txt -b ${BED_SOURCE_MITO} -a mrna.txt ${MAPPING_OUT}/mito.${id}.bam) &" >> fpcountcommands.sh
done
echo "wait" >> fpcountcommands.sh
source fpcountcommands.sh


cat filtercommands.sh >> list_of_commands_${RUN_ID}
cat ncrnadepletioncommands.sh >> list_of_commands_${RUN_ID}
cat starcommands.sh >> list_of_commands_${RUN_ID}
cat renamingcommands.sh >> list_of_commands_${RUN_ID}
cat indexingcommands.sh >> list_of_commands_${RUN_ID}
cat fpcountcommands.sh >>  list_of_commands_${RUN_ID}


rm *commands.sh
mv list_of_commands_${RUN_ID} ${SHELL_IN_OUT}
cp rnascript.sh ${SHELL_IN_OUT}


echo "exiting riboscript"
date +"%Y/%m/%d %H:%M:%S"

mv nohup.out ${SHELL_IN_OUT}
