#!/bin/bash -x

#Run Cutadapt on the iRFP barcodes

#Cut constant on 3'end
for sample in Rep1_0h_iRFP Rep1_2h_iRFP Rep1_5h_iRFP Rep2_0h_iRFP Rep2_2h_iRFP Rep2_5h_iRFP Rep3_0h_iRFP Rep3_2h_iRFP Rep3_5h_iRFP
do
        cutadapt -j 30 -a ACGAATTTCGACCTTTACGTGCTAACA -m 68 ./${sample}.fastq.gz > ./${sample}_Cut1.fastq
done

#Cut constant on 5'end
for sample in Rep1_0h_iRFP Rep1_2h_iRFP Rep1_5h_iRFP Rep2_0h_iRFP Rep2_2h_iRFP Rep2_5h_iRFP Rep3_0h_iRFP Rep3_2h_iRFP Rep3_5h_iRFP
do
        cutadapt -j 30 -g GCTGTCGAGATCCAGTCCG -m 50 ./${sample}_Cut1.fastq > ./${sample}_Cut2.fastq
done

#Now should be left with UMI-constant-barcode only

#Run Cutadapt on the mCherry barcodes

#Cut constant on 3'end
for sample in Rep1_0h_mCherry Rep1_2h_mCherry Rep1_5h_mCherry Rep2_0h_mCherry Rep2_2h_mCherry Rep2_5h_mCherry Rep3_0h_mCherry Rep3_2h_mCherry Rep3_5h_mCherry
do
        cutadapt -j 30 -a GAGGCAATGCAAAGTGACTCGACTGGT -m 68 ./${sample}.fastq.gz > ./${sample}Cut1.fastq
done

#Cut constant on 5'end
for sample in Rep1_0h_mCherry Rep1_2h_mCherry Rep1_5h_mCherry Rep2_0h_mCherry Rep2_2h_mCherry Rep2_5h_mCherry Rep3_0h_mCherry Rep3_2h_mCherry Rep3_5h_mCherry
do
        cutadapt -j 30 -g GCAGAAGTGGTCCTGCAAC -m 50 ./${sample}Cut1.fastq > ./${sample}Cut2.fastq
done

#Now should be left with UMI-constant-barcode only

# Use CutAdapt on fastq files that have the following format:
# UMI-ACT_INTERNAL-BARCODE
# This will cut off 8 nt at the beginning of the read and move it into the header
# After cutting, it will trim the ACT_INTERAL adapter from the 5' end of the read

# iRFP

ACT_INTERNAL_IRFP="TCCAGTCACTCTCCTGCA"

for sample in Rep1_0h_iRFP Rep1_2h_iRFP Rep1_5h_iRFP Rep2_0h_iRFP Rep2_2h_iRFP Rep2_5h_iRFP Rep3_0h_iRFP Rep3_2h_iRFP Rep3_5h_iRFP
do
        cutadapt -j 30 -m 16 --cut 8 -g "${ACT_INTERNAL_IRFP}" \
        --rename '{header} umi={cut_prefix}' \
        ./${sample}_Cut2.fastq \
        -o ./${sample}_Cut3.fastq \
        > ./${sample}_Cut3-log.txt
done


# mCherry

ACT_INTERNAL_MCHERRY="CATGTCTGGATCCCTGCA"

for sample in Rep1_0h_mCherry Rep1_2h_mCherry Rep1_5h_mCherry Rep2_0h_mCherry Rep2_2h_mCherry Rep2_5h_mCherry Rep3_0h_mCherry Rep3_2h_mCherry Rep3_5h_mCherry
do
        cutadapt -j 30 -m 16 --cut 8 -g "${ACT_INTERNAL_MCHERRY}" \
        --rename '{header} umi={cut_prefix}' \
        ./${sample}Cut2.fastq \
        -o ./${sample}_Cut3.fastq \
        > ./${sample}_Cut3-log.txt
done

# Script to collapse highly similar barcodes together before UMI-deduplication,
# perform UMI-based de-deuplication,
# and create a final count table to give a count of distinct UMIs.
# Also creates a file named -umi-dedup.txt with UMI de-deduplication statistics.

for sample in Rep1_0h_iRFP Rep1_2h_iRFP Rep1_5h_iRFP Rep2_0h_iRFP Rep2_2h_iRFP Rep2_5h_iRFP Rep3_0h_iRFP Rep3_2h_iRFP Rep3_5h_iRFP Rep1_0h_mCherry Rep1_2h_mCherry Rep1_5h_mC$
do
        ../../barcode-assign/target/debug/bc-umi -f ./${sample}_Cut3.fastq \
          -n ./${sample}_barcode \
          -d ./${sample}_barcode \
          -o ./${sample}_barcode-count.txt
done

#iRFP count table

COUNTDIR='./'

COUNTS_IRFP=$(echo ${COUNTDIR}*iRFP*count.txt)

../../barcode_assign/target/debug/bc-tabulate ${COUNTS_IRFP} -o ${COUNTDIR}NIJK04_iRFP_bccounts_final.txt

#mCherry count table

COUNTS_MCHERRY=$(echo ${COUNTDIR}*mCherry*count.txt)

../../barcode_assign/target/debug/bc-tabulate ${COUNTS_MCHERRY} -o ${COUNTDIR}NIJK04_mCherry_bccounts_final.txt
