#!usr/bin/perl use strict; use warnings; my $cpu = 4; my $outdir = "/project/gccri/CPRIT_PDX/RNA"; my $star_h = "/home/hef/Data/hg38/star"; my $star_m = "/home/hef/Data/mm10/star"; my $ref_h = "/home/hef/Data/hg38/hg38.fa"; my $ref_m = "/home/hef/Data/mm10/mm10.fa"; my $gtf_h = "/home/hef/Data/hg38/optimist_ref/gencodev22/gencode.v22.annotation.gtf"; my $gtf_m = "/home/hef/Data/mm10/gencode.vM19.annotation.gtf"; my $gff_h = "/home/hef/Data/hg38/optimist_ref/gencodev22/gencode.v22.annotation.gff3"; my $rnaseqc_gtf = "/home/hef/Data/hg38/gencode.v22.genes.gtf"; my $kallisto_h = "/home/hef/Data/hg38/optimist_ref/gencodev22/gencode.v22.all_transcripts.fa.idx"; my $ctat_lib_dir = "/home/hef/Data/hg38/optimist_ref/GRCh38_gencode_v22.star-fusion.v1.10"; my $pwd = `pwd`; chomp $pwd; `mkdir -p $outdir/0.disambiguate $outdir/1.trim $outdir/1.trim/trimmed_fq $outdir/1.trim/Final_fq $outdir/2.exp $outdir/2.exp/1.kallisto $outdir/3.fusion $outdir/3.fusion/1.star_fusion $outdir/3.fusion/2.PRADA $outdir/2.exp/2.RSEM $outdir/2.exp/3.htseq $outdir/4.rnaseqc`; `mkdir -p $pwd/script_sh.rna`; open OT, ">trimmed_fq.rna.ls" or die $!; open IN,"<$ARGV[0]" or die $!; while(<IN>){ chomp; my ($id, $fq1, $fq2) = split /\t/; open SH, ">script_sh.rna/$id.rna.sh" or die $!; print SH "trim_galore --phred33 --fastqc --length 50 -q 20 --gzip -o $outdir/1.trim/trimmed_fq --paired $fq1 $fq2\n"; $fq1 = "$outdir/1.trim/trimmed_fq/$id*val_1.fq.gz"; $fq2 = "$outdir/1.trim/trimmed_fq/$id*val_2.fq.gz"; if ($id =~ /PDX/){ #map to human print SH "STAR --runThreadN $cpu --genomeDir $star_h --sjdbGTFfile $gtf_h --sjdbOverhang 100 --readFilesIn $fq1 $fq2 --outFileNamePrefix $outdir/0.disambiguate/$id\.human. --outSAMtype BAM Unsorted --twopassMode Basic --outSAMattributes All --genomeLoad NoSharedMemory --readFilesCommand zcat --outReadsUnmapped Fastx --outSAMunmapped Within; samtools sort -m 3G -@ $cpu -o $outdir/0.disambiguate/$id\.human.sort.bam -n $outdir/0.disambiguate/$id\.human.Aligned.out.bam; /bin/rm -rf $outdir/0.disambiguate/$id\.human.Aligned.out.bam $outdir/0.disambiguate/$id\.human.*STAR* $outdir/0.disambiguate/$id\.human.Log* $outdir/0.disambiguate/$id\.human.*tab\n"; #map to mouse print SH "STAR --runThreadN $cpu --genomeDir $star_m --sjdbGTFfile $gtf_m --sjdbOverhang 100 --readFilesIn $fq1 $fq2 --outFileNamePrefix $outdir/0.disambiguate/$id\.mouse. --outSAMtype BAM Unsorted --twopassMode Basic --outSAMattributes All --genomeLoad NoSharedMemory --readFilesCommand zcat --outReadsUnmapped Fastx --outSAMunmapped Within; samtools sort -m 3G -@ $cpu -o $outdir/0.disambiguate/$id\.mouse.sort.bam -n $outdir/0.disambiguate/$id\.mouse.Aligned.out.bam; /bin/rm -rf $outdir/0.disambiguate/$id\.mouse.Aligned.out.bam $outdir/0.disambiguate/$id\.mouse.*STAR* $outdir/0.disambiguate/$id\.mouse*out*\n"; #convert to fq print SH "disambiguate -s $id -o $outdir/0.disambiguate -a star $outdir/0.disambiguate/$id\.human.sort.bam $outdir/0.disambiguate/$id\.mouse.sort.bam;samtools merge $outdir/0.disambiguate/$id\.disam.merge.bam $outdir/0.disambiguate/$id\.disambiguatedSpeciesA.bam $outdir/0.disambiguate/$id\.ambiguousSpeciesA.bam $outdir/0.disambiguate/$id\.ambiguousSpeciesB.bam; samtools sort -m 3G -@ $cpu -o $outdir/0.disambiguate/$id\.disam.sortbyname.bam -n $outdir/0.disambiguate/$id\.disam.merge.bam\n"; print SH "samtools fastq $outdir/0.disambiguate/$id\.disam.sortbyname.bam -1 $outdir/0.disambiguate/$id\.R1.gz -2 $outdir/0.disambiguate/$id\.R2.gz -0 /dev/null -s /dev/null -n -F 0x900; /bin/rm -rf $outdir/0.disambiguate/$id\.human*bam $outdir/0.disambiguate/$id\.mouse* $outdir/0.disambiguate/$id\.*bam $outdir/0.disambiguate/$id\.*log\n"; #merge with unmapped reads print SH "gunzip -c $outdir/0.disambiguate/$id\.R1.gz > $outdir/1.trim/Final_fq/$id\.R1.fq;gunzip -c $outdir/0.disambiguate/$id\.R2.gz > $outdir/1.trim/Final_fq/$id\.R2.fq\n"; print SH "cat $outdir/1.trim/Final_fq/$id\.R1.fq $outdir/0.disambiguate/$id\.human.Unmapped.out.mate1 > $outdir/1.trim/Final_fq/$id\.R1.temp; mv $outdir/1.trim/Final_fq/$id\.R1.temp $outdir/1.trim/Final_fq/$id\.R1.fq; cat $outdir/1.trim/Final_fq/$id\.R2.fq $outdir/0.disambiguate/$id\.human.Unmapped.out.mate2 > $outdir/1.trim/Final_fq/$id\.R2.temp; mv $outdir/1.trim/Final_fq/$id\.R2.temp $outdir/1.trim/Final_fq/$id\.R2.fq;/bin/rm -rf $outdir/0.disambiguate/$id\.R1.gz $outdir/0.disambiguate/$id\.R2.gz\n"; } else{ print SH "gunzip -c $fq1 > $outdir/1.trim/Final_fq/$id\.R1.fq; gunzip -c $fq2> $outdir/1.trim/Final_fq/$id\.R2.fq\n"; } $fq1 = "$outdir/1.trim/Final_fq/$id\.R1.fq"; $fq2 = "$outdir/1.trim/Final_fq/$id\.R2.fq"; print OT "$id\t$fq1\t$fq2\n"; #kallisto print SH "kallisto quant -i $kallisto_h -o $outdir/2.exp/1.kallisto/$id -t $cpu --plaintext $fq1 $fq2\n"; #STAR-Fusion print SH "/home/hef/Tools/STAR-Fusion-v1.10.0/STAR-Fusion --genome_lib_dir $ctat_lib_dir --left_fq $fq1 --right_fq $fq2 --FusionInspector validate --examine_coding_effect --CPU $cpu --output_dir $outdir/3.fusion/1.star_fusion/$id;/bin/rm -rf $outdir/3.fusion/1.star_fusion/$id/*preliminary*\n"; #PRADA print SH "source /home/hef/Tools/miniconda3/etc/profile.d/conda.sh;conda activate prada\n"; print SH "python /home/hef/Tools/PRADA2-master/prada2.py --read1 $fq1 --read2 $fq2 --outdir $outdir/3.fusion/2.PRADA\n"; print SH "python /home/hef/Tools/PRADA2-master/prada2.py --read1 $fq1 --read2 $fq2 --outdir $outdir/3.fusion/2.PRADA --fusion\n"; print SH "conda deactivate\n"; print SH "source /home/hef/Tools/miniconda3/etc/profile.d/conda.sh;conda activate py2\n"; print SH "python /home/hef/Tools/PRADA2-master/prada2.py --read1 $fq1 --read2 $fq2 --outdir $outdir/3.fusion/2.PRADA --rsem\n"; print SH "conda deactivate\n"; print SH "ln -s $outdir/3.fusion/2.PRADA/$id/rsem_results/rsem.genes.results $outdir/2.exp/2.RSEM/$id.genes.results ; ln -s $outdir/3.fusion/2.PRADA/$id/rsem_results/rsem.isoforms.results $outdir/2.exp/2.RSEM/$id.isoforms.results;/bin/rm -rf $outdir/3.fusion/2.PRADA/$id/rsem_results/*bam $outdir/3.fusion/2.PRADA/$id/bam_results\n"; #HTseq print SH "samtools sort -m 2G -@ $cpu -o $outdir/2.exp/3.htseq/$id.sorted.bam $outdir/3.fusion/1.star_fusion/$id/Aligned.out.bam; samtools index $outdir/2.exp/3.htseq/$id\.sorted.bam;/bin/rm -rf $outdir/3.fusion/1.star_fusion/$id/Aligned.out.bam\n"; print SH "htseq-count -s no -f bam -r pos -n $cpu -t exon -i ID -m union --nonunique all $outdir/2.exp/3.htseq/$id\.sorted.bam $gff_h --additional-attr=gene_id --additional-attr=gene_name --additional-attr=transcript_id --additional-attr=exon_number > $outdir/2.exp/3.htseq/$id\.htseq.txt\n"; #RNAseQC print SH "rnaseqc $rnaseqc_gtf $outdir/2.exp/3.htseq/$id\.sorted.bam --coverage $outdir/4.rnaseqc/$id\n"; print SH "gzip $outdir/1.trim/Final_fq/$id\.R1.fq; gzip $outdir/1.trim/Final_fq/$id\.R2.fq\n" }