#!/bin/bash
RUN_ID=ichinose_fly_profiling
WORKING_DIR="/datanfs22/${RUN_ID}"

POOL_ID_A=IP
POOL_ID_B=Input


SAMPLE_LIST_A="poolA.csv"
SAMPLE_ID_A=( $(cut -d ',' -f1 ${SAMPLE_LIST_A} ) )

SAMPLE_LIST_B="poolB.csv"
SAMPLE_ID_B=( $(cut -d ',' -f1 ${SAMPLE_LIST_B} ) )


DEPLETION_SOURCE="./190919_Neuro_Glia/genome/fly_ncrna"
GENOMEINDEX_DIR="./190919_Neuro_Glia/genome/dm6/"

GENE_SOURCE="./190919_Neuro_Glia/genome/genes_ensemble.txt"
BED_SOURCE="./190919_Neuro_Glia/genome/dm6_ensemble.bed"

GENE_SOURCE_MITO="./190919_Neuro_Glia/genome/chrM_dm6_mrna_genes.txt"
BED_SOURCE_MITO="./190919_Neuro_Glia/genome/chrM_dm6_mrna.bed"



CLIPPED_OUT="${WORKING_DIR}/clipped"
SPLIT_OUT_A="${WORKING_DIR}/splitted_${POOL_ID_A}_fly"
SPLIT_OUT_B="${WORKING_DIR}/splitted_${POOL_ID_B}_fly"
UNALIGNED_OUT="${WORKING_DIR}/unaligned_fly"
MAPPING_OUT="${WORKING_DIR}/mapped_fly"
UNIQUE_READS_OUT="${WORKING_DIR}/unique_fly"
FPFRAMING_OUT="${WORKING_DIR}/fpframing_fly"

ADAPTER_SEQ=AGATCGGAAGAGCACACGTCTGAA

PRE_SEQ=NN
POST_SEQ=NNNNNIIIII


echo "beginning riboscript"



mkdir ${CLIPPED_OUT} ${UNALIGNED_OUT} ${MAPPING_OUT} ${UNIQUE_READS_OUT} ${FPFRAMING_OUT} ${WORKING_DIR}/shell_in_out

# adapter removal
echo "task: adapter removal"
echo "(nohup zcat ${WORKING_DIR}/RawFastq/HTTI001A_S64_L006_R1_001.fastq.gz | fastq_illumina_filter --keep N -v | fastx_clipper -Q33 -a ${ADAPTER_SEQ}  -c -v -o ${CLIPPED_OUT}/${POOL_ID_A}_clipped.fastq) &" >> filtercommands.sh
echo "wait">> filtercommands.sh
echo "(nohup zcat ${WORKING_DIR}/RawFastq/HTTI001B_S65_L006_R1_001.fastq.gz | fastq_illumina_filter --keep N -v | fastx_clipper -Q33 -a ${ADAPTER_SEQ}  -c -v -o ${CLIPPED_OUT}/${POOL_ID_B}_clipped.fastq) &" >> filtercommands.sh
echo "wait">> filtercommands.sh

source filtercommands.sh


echo "task: split by index"
echo "(nohup fastx-split -o ${SPLIT_OUT_A} -p ${PRE_SEQ} -x ${POST_SEQ} --min-insert=14 -s ${SAMPLE_LIST_A} ${CLIPPED_OUT}/${POOL_ID_A}_clipped.fastq) &" >> splitcommands.sh
echo "wait" >> splitcommands.sh
echo "(nohup fastx-split -o ${SPLIT_OUT_B} -p ${PRE_SEQ} -x ${POST_SEQ} --min-insert=14 -s ${SAMPLE_LIST_B} ${CLIPPED_OUT}/${POOL_ID_B}_clipped.fastq) &" >> splitcommands.sh
echo "wait" >> splitcommands.sh
source splitcommands.sh


#Align reads to depletion sequences


for id in ${SAMPLE_ID_A[@]}
do
		echo "(nohup mkdir ${UNALIGNED_OUT}/${id})" >> ncrnadepletioncommands.sh
		echo "wait">> ncrnadepletioncommands.sh
		echo "(nohup STAR --genomeDir ${DEPLETION_SOURCE} --readFilesIn ${SPLIT_OUT_A}/${id}.fastq --runThreadN 1 --outFilterMultimapNmax 2000 --outSAMtype BAM SortedByCoordinate --outFileNamePrefix ${UNALIGNED_OUT}/${id}/ --outReadsUnmapped Fastx --limitBAMsortRAM 10000000000) &" >> ncrnadepletioncommands.sh
		echo "wait">> ncrnadepletioncommands.sh
done

for id in ${SAMPLE_ID_B[@]}
do
		echo "(nohup mkdir ${UNALIGNED_OUT}/${id})" >> ncrnadepletioncommands.sh
		echo "wait">> ncrnadepletioncommands.sh
		echo "(nohup STAR --genomeDir ${DEPLETION_SOURCE} --readFilesIn ${SPLIT_OUT_B}/${id}.fastq --runThreadN 1 --outFilterMultimapNmax 2000 --outSAMtype BAM SortedByCoordinate --outFileNamePrefix ${UNALIGNED_OUT}/${id}/ --outReadsUnmapped Fastx --limitBAMsortRAM 10000000000) &" >> ncrnadepletioncommands.sh
		echo "wait">> ncrnadepletioncommands.sh
done


echo "task: ncrnadepletion"
echo "wait">> ncrnadepletioncommands.sh
source ncrnadepletioncommands.sh





# Map rtRNA-depleted reads to genome
#--alignIntronMin 0 for plant

for id in ${SAMPLE_ID_A[@]}
do
		echo "(nohup mkdir ${MAPPING_OUT}/${id})" >> starcommands.sh
		echo "wait">> starcommands.sh
        echo "(nohup STAR --genomeDir ${GENOMEINDEX_DIR} --readFilesIn ${UNALIGNED_OUT}/${id}/Unmapped.out.mate1 --runThreadN 1 --outSAMtype BAM SortedByCoordinate --outFileNamePrefix ${MAPPING_OUT}/${id}/ --limitBAMsortRAM 10000000000) &" >> starcommands.sh
		echo "wait">> starcommands.sh
done

for id in ${SAMPLE_ID_B[@]}
do
		echo "(nohup mkdir ${MAPPING_OUT}/${id})" >> starcommands.sh
		echo "wait">> starcommands.sh
        echo "(nohup STAR --genomeDir ${GENOMEINDEX_DIR} --readFilesIn ${UNALIGNED_OUT}/${id}/Unmapped.out.mate1 --runThreadN 1 --outSAMtype BAM SortedByCoordinate --outFileNamePrefix ${MAPPING_OUT}/${id}/ --limitBAMsortRAM 10000000000) &" >> starcommands.sh
		echo "wait">> starcommands.sh
done




echo "task: star"
echo "wait">> starcommands.sh
source starcommands.sh

#Extract mapped reads

for id in ${SAMPLE_ID_A[@]}
do
        echo "(nohup samtools view -b ${MAPPING_OUT}/${id}/Aligned.sortedByCoord.out.bam > ${MAPPING_OUT}/${id}.bam ) &" >> renamingcommands.sh
done

for id in ${SAMPLE_ID_B[@]}
do
        echo "(nohup samtools view -b ${MAPPING_OUT}/${id}/Aligned.sortedByCoord.out.bam > ${MAPPING_OUT}/${id}.bam ) &" >> renamingcommands.sh
done


echo "task: renaming bam file"
echo "wait">> renamingcommands.sh
source renamingcommands.sh


#Index bam files

for id in ${SAMPLE_ID_A[@]}
do
echo "(nohup samtools index ${MAPPING_OUT}/${id}.bam ) &" >> indexingcommands.sh
done

for id in ${SAMPLE_ID_B[@]}
do
echo "(nohup samtools index ${MAPPING_OUT}/${id}.bam ) &" >> indexingcommands.sh
done

echo "task: indexing bam file"
echo "wait">>indexingcommands.sh
source indexingcommands.sh


#Suppress duplicated reads

for id in ${SAMPLE_ID_A[@]}
do
        echo "(nohup bam-suppress-duplicates -i ${MAPPING_OUT}/${id}.bam -o ${UNIQUE_READS_OUT}/${id}_unique.bam --annotate --stats=${UNIQUE_READS_OUT}/${id}_duplicate_stats.txt ) &" >> duplicatescommands.sh
done

for id in ${SAMPLE_ID_B[@]}
do
        echo "(nohup bam-suppress-duplicates -i ${MAPPING_OUT}/${id}.bam -o ${UNIQUE_READS_OUT}/${id}_unique.bam --annotate --stats=${UNIQUE_READS_OUT}/${id}_duplicate_stats.txt ) &" >> duplicatescommands.sh
done


#Suppress duplicated reads mapped to ncrna

for id in ${SAMPLE_ID_A[@]}
do
        echo "(nohup bam-suppress-duplicates -i ${UNALIGNED_OUT}/${id}/Aligned.sortedByCoord.out.bam -o ${UNIQUE_READS_OUT}/${id}_ncrna_unique.bam --annotate --stats=${UNIQUE_READS_OUT}/${id}_ncrna_duplicate_stats.txt ) &" >> duplicatescommands.sh
done

for id in ${SAMPLE_ID_B[@]}
do
        echo "(nohup bam-suppress-duplicates -i ${UNALIGNED_OUT}/${id}/Aligned.sortedByCoord.out.bam -o ${UNIQUE_READS_OUT}/${id}_ncrna_unique.bam --annotate --stats=${UNIQUE_READS_OUT}/${id}_ncrna_duplicate_stats.txt ) &" >> duplicatescommands.sh
done


echo "task: removing duplicates from bam file"
echo "wait">>duplicatescommands.sh
source duplicatescommands.sh


#Index bam files

for id in ${SAMPLE_ID_A[@]}
do
       echo "(nohup samtools index ${UNIQUE_READS_OUT}/${id}_unique.bam) &" >> secondindexingcommands.sh
done

for id in ${SAMPLE_ID_B[@]}
do
       echo "(nohup samtools index ${UNIQUE_READS_OUT}/${id}_unique.bam) &" >> secondindexingcommands.sh
done

for id in ${SAMPLE_ID_A[@]}
do
       echo "(nohup samtools index ${UNIQUE_READS_OUT}/${id}_ncrna_unique.bam) &" >> secondindexingcommands.sh
done

for id in ${SAMPLE_ID_B[@]}
do
       echo "(nohup samtools index ${UNIQUE_READS_OUT}/${id}_ncrna_unique.bam) &" >> secondindexingcommands.sh
done


echo "task: re-indexing bam file"
echo "wait">>secondindexingcommands.sh
source secondindexingcommands.sh



for id in ${SAMPLE_ID_A[@]}
do
       echo "(nohup fp-framing -l 14,60 --flanking=-300,300 -g ${GENE_SOURCE} -o ${FPFRAMING_OUT}/${id} -b ${BED_SOURCE} ${UNIQUE_READS_OUT}/${id}_unique.bam) &" >> fpframingcommands.sh
done

for id in ${SAMPLE_ID_B[@]}
do
       echo "(nohup fp-framing -l 14,60 --flanking=-300,300 -g ${GENE_SOURCE} -o ${FPFRAMING_OUT}/${id} -b ${BED_SOURCE} ${UNIQUE_READS_OUT}/${id}_unique.bam) &" >> fpframingcommands.sh
done

for id in ${SAMPLE_ID_A[@]}
do
       echo "(nohup fp-framing -l 14,60 --flanking=-300,300 -g ${GENE_SOURCE_MITO} -o ${FPFRAMING_OUT}/Mt_${id} -b ${BED_SOURCE_MITO} ${UNIQUE_READS_OUT}/${id}_unique.bam) &" >> fpframingcommands.sh
done

for id in ${SAMPLE_ID_B[@]}
do
       echo "(nohup fp-framing -l 14,60 --flanking=-300,300 -g ${GENE_SOURCE_MITO} -o ${FPFRAMING_OUT}/Mt_${id} -b ${BED_SOURCE_MITO} ${UNIQUE_READS_OUT}/${id}_unique.bam) &" >> fpframingcommands.sh
done


echo "task: fp-framing"
echo "wait">>fpframingcommands.sh
source fpframingcommands.sh




cat filtercommands.sh >> list_of_commands_${RUN_ID}
cat splitcommands.sh >> list_of_commands_${RUN_ID}
cat bowtiecommands.sh >> list_of_commands_${RUN_ID}
cat tophatcommands.sh >> list_of_commands_${RUN_ID}
cat renamingcommands.sh >> list_of_commands_${RUN_ID}
cat indexingcommands.sh >> list_of_commands_${RUN_ID}
cat duplicatescommands.sh >> list_of_commands_${RUN_ID}
cat secondindexingcommands.sh >>  list_of_commands_${RUN_ID}
cat fpframingcommands.sh >>  list_of_commands_${RUN_ID}


rm *commands.sh
mv list_of_commands_${RUN_ID} ${WORKING_DIR}/shell_in_out
cp riboscript.sh ${WORKING_DIR}/shell_in_out
mv nohup.out ${WORKING_DIR}/shell_in_out

echo "exiting riboscript"

