#!/bin/bash
### PASTE AND RUN THIS IN THE /ichinose/ DIRECTORY
RUN_ID=210825_transcriptome_Nsyb_Repo
WORKING_DIR="./${RUN_ID}"

###Save pool.csv files in the /ichinose/ directory###
SAMPLE_LIST=
SAMPLE_ID=( $(cut -d ',' -f1 ${SAMPLE_LIST} ) )


DEPLETION_SOURCE="./190919_Neuro_Glia/genome/fly_ncrna"
GENOMEINDEX_DIR="./190919_Neuro_Glia/genome/dm6/"

GENE_SOURCE="./190919_Neuro_Glia/genome/genes_ensemble.txt"
BED_SOURCE="./190919_Neuro_Glia/genome/dm6_ensemble.bed"

GENE_SOURCE_MITO="./190919_Neuro_Glia/genome/chrM_dm6_mrna_genes.txt"
BED_SOURCE_MITO="./190919_Neuro_Glia/genome/chrM_dm6_mrna.bed"


CLIPPED_OUT="${WORKING_DIR}/clipped_RNAseq"
UNALIGNED_OUT="${WORKING_DIR}/unaligned_RNAseq"
MAPPING_OUT="${WORKING_DIR}/mapped_RNAseq"
FPFRAMING_OUT="${WORKING_DIR}/fpframing_RNAseq"

ADAPTER_SEQ=AAGTCGGAGGCCAAGCGGTCTTAGGAAGACAA

mkdir ${CLIPPED_OUT} ${UNALIGNED_OUT} ${MAPPING_OUT} ${FPFRAMING_OUT} ${WORKING_DIR}/shell_in_out

echo "beginning riboscript"


# adapter removal

for id in ${SAMPLE_ID[@]}
do
	echo "(nohup fastp -i ${WORKING_DIR}/00_Rawdata/${id}_*.fastq.gz -o ${CLIPPED_OUT}/${id}_clipped.fastq -a ${ADAPTER_SEQ} -w 12 -h ${CLIPPED_OUT}/${id}.html) &" >> filtercommands.sh
	echo "wait">> filtercommands.sh
done
source filtercommands.sh


#Align reads to depletion sequences


for id in ${SAMPLE_ID[@]}
do
		echo "(nohup mkdir ${UNALIGNED_OUT}/${id})" >> ncrnadepletioncommands.sh
		echo "wait">> ncrnadepletioncommands.sh
		echo "(nohup STAR --genomeDir ${DEPLETION_SOURCE} --readFilesIn ${CLIPPED_OUT}/${id}_clipped.fastq --runThreadN 12 --outFilterMultimapNmax 2000 --outSAMtype BAM SortedByCoordinate --outFileNamePrefix ${UNALIGNED_OUT}/${id}/ --outReadsUnmapped Fastx --limitBAMsortRAM 10000000000) &" >> ncrnadepletioncommands.sh
		echo "wait">> ncrnadepletioncommands.sh
done

echo "task: ncrnadepletion"
echo "wait">> ncrnadepletioncommands.sh
source ncrnadepletioncommands.sh


# Map rtRNA-depleted reads to genome
#--alignIntronMin 0 for plant

for id in ${SAMPLE_ID[@]}
do
		echo "(nohup mkdir ${MAPPING_OUT}/${id})" >> starcommands.sh
		echo "wait">> starcommands.sh
        echo "(nohup STAR --genomeDir ${GENOMEINDEX_DIR} --readFilesIn ${UNALIGNED_OUT}/${id}/Unmapped.out.mate1 --runThreadN 12 --outSAMtype BAM SortedByCoordinate --outFileNamePrefix ${MAPPING_OUT}/${id}/ --limitBAMsortRAM 10000000000) &" >> starcommands.sh
		echo "wait">> starcommands.sh
done

echo "task: star"
echo "wait">> starcommands.sh
source starcommands.sh

#Extract mapped reads

for id in ${SAMPLE_ID[@]}
do
        echo "(nohup samtools view -b ${MAPPING_OUT}/${id}/Aligned.sortedByCoord.out.bam > ${MAPPING_OUT}/${id}.bam ) &" >> renamingcommands.sh
done

echo "task: renaming bam file"
echo "wait">> renamingcommands.sh
source renamingcommands.sh

#Index bam files

for id in ${SAMPLE_ID[@]}
do
echo "(nohup samtools index ${MAPPING_OUT}/${id}.bam ) &" >> indexingcommands.sh
done

echo "task: indexing bam file"
echo "wait">>indexingcommands.sh
source indexingcommands.sh

for id in ${SAMPLE_ID[@]}
do
       echo "(nohup fp-framing -l 14,100 --flanking=-300,300 -g ${GENE_SOURCE} -o ${FPFRAMING_OUT}/${id} -b ${BED_SOURCE} ${MAPPING_OUT}/${id}.bam) &" >> fpframingcommands.sh
done


echo "task: fp-framing"
echo "wait">>fpframingcommands.sh
source fpframingcommands.sh

cat filtercommands.sh >> list_of_commands_${RUN_ID}
cat splitcommands.sh >> list_of_commands_${RUN_ID}
cat bowtiecommands.sh >> list_of_commands_${RUN_ID}
cat tophatcommands.sh >> list_of_commands_${RUN_ID}
cat renamingcommands.sh >> list_of_commands_${RUN_ID}
cat indexingcommands.sh >> list_of_commands_${RUN_ID}
cat duplicatescommands.sh >> list_of_commands_${RUN_ID}
cat secondindexingcommands.sh >>  list_of_commands_${RUN_ID}
cat fpframingcommands.sh >>  list_of_commands_${RUN_ID}


rm *commands.sh
mv list_of_commands_${RUN_ID} ${WORKING_DIR}/shell_in_out
cp riboscript.sh ${WORKING_DIR}/shell_in_out
mv nohup.out ${WORKING_DIR}/shell_in_out

echo "exiting riboscript"
