#!/usr/bin/perl -w #MicroC_analysis_pipeline_Part1 use strict; use Cwd; use Cwd 'abs_path'; #use Data::Dumper; use Getopt::Long; use Pod::Usage; use File::Basename; use lib dirname (__FILE__); use MCC; =head1 NAME MicroC-analysis_pipeline_part1.pl =head1 SYNOPSIS Dependencies: Trim_galore FLASh BLAT Bowtie2 Samtools MACS2 WigToBigWig gunzip =cut # Specifications my $pipescript_path=abs_path($0); my ($pipescript_filename, $pipescript_directory) = fileparse($pipescript_path); # Strings my $path = getcwd; my ($sec,$min,$hour,$mday,$mon,$year,$wday,$yday,$isdst) = localtime(); my $help=0; my $man=0; my $config=0; my $stringent = 0; my $input_file="MicroC_config.txt"; # Arrays my @files; my %param; MCC::default_parameters(\%param); # The GetOptions from the command line &GetOptions ( "i=s"=>\ $input_file, "f=s"=>\ @files, "q"=>\ $param{"qsub"}, "genome=s"=>\ $param{"genome"}, "pf=s"=>\ $param{"public_folder"}, "pu=s"=>\ $param{"public_url"}, 'h|help'=>\$help, 'man'=>\$man, 'config'=>\$config, 'combine'=>\$param{"combine"}, ); pod2usage(1) if $help; pod2usage(-verbose=>2) if $man; pod2usage(2) unless ($input_file); if ($config ==1){MCC::config_file()} load_config_file("$path/$input_file", \%param, \@files); #Create a subdirectory in the public folder $param{"public_folder_expt"} = $param{"public_folder"}."/".$param{"reference"}; if (-d $param{"public_folder"}){} else {mkdir $param{"public_folder"}."/".$param{"reference"}}; system ("test -e log.txt && rm log.txt"); MCC::print_parameters(\%param); foreach my $filename (@files) { print "Writing tmp.sh for $filename\n"; open TMP, ">tmp.sh" or die "Couldn't open input file tmp.sh"; print TMP "#!/bin/bash #SBATCH -p em #SBATCH -t 2-00:00:00 #SBATCH -o out.txt #SBATCH -n 12 #SBATCH -N 1 #SBATCH --mail-type END set -e set -u set -o pipefail # Script started at $hour:$min:$sec $mday/$mon/$year # Filename $filename\n"; MCC::output_parameters(\%param, \*TMP); ######################################################################## my $output_path= "$path/".$param{'reference'}.$filename; if (-d $output_path){} else {mkdir $output_path}; if (($param{"clean_wig"} =~ /^[Yy1]/) and ($param{"qsub"} =~ /^[Yy1]/)) { chdir($output_path); system("rm *.wig -rf"); chdir($path); } # Unzip files if ($param{"gunzip"} =~ /^[Yy1]/) { if (-e "$filename\_R1.fq.gz"){print TMP "mv $filename\_R1.fq.gz $filename\_R1.fastq.gz\n"} if (-e "$filename\_R2.fq.gz"){print TMP "mv $filename\_R2.fq.gz $filename\_R2.fastq.gz\n"} if (-e "$filename\_R1.fastq.gz"){print TMP "gunzip $filename\_R1.fastq.gz\n"} if (-e "$filename\_R2.fastq.gz"){print TMP "gunzip $filename\_R2.fastq.gz\n"} if (-e "$filename\_ext.fastq.gz"){print TMP "gunzip $filename\_ext.fastq.gz\n"} if (-e "$filename\_ext.fa.gz"){print TMP "gunzip $filename\_ext.fa.gz\n"} if (-e "$filename\_ext.psl.gz"){print TMP "gunzip $filename\_ext.psl.gz\n"} print TMP 'if wait; then echo "'."$filename gunzip completed successfully".'">>log.txt;fi'."\n"; } # Trim reads if ($param{"trim_galore"} =~ /^[Yy1]/) { print TMP " module load trimgalore/0.4.1 test -e $filename\_R1.fastq && trim_galore --paired $filename\_R1.fastq $filename\_R2.fastq || exit 0\n"; print TMP 'if wait; then echo "trim_galore completed successfully">>log.txt;fi'."\n"; } # Merge the paired end reads into single end reads if ($param{"flash"} =~ /^[Yy1]/) { print TMP "module load flash test -e $filename\_R1_val_1.fq && flash $filename\_R1_val_1.fq $filename\_R2_val_2.fq -o $filename || exit 0\n"; print TMP 'if wait; then echo "flash completed">>log.txt;fi'."\n"; print TMP "mv $filename.extendedFrags.fastq $filename\_ext.fastq\n"; if ($param{"gzip"} eq "Y") { if (-e "$filename\_R1.fastq"){print TMP "gzip $filename\_R1.fastq\n"}; if (-e "$filename\_R2.fastq"){print TMP "gzip $filename\_R2.fastq\n"}; } print TMP 'if wait; then echo "'."$filename flash completed successfully".'">>log.txt;fi'."\n"; } # Convert fastq to fa if ($param{"fq2fa"} =~ /^[Yy1]/) { print TMP "test -e $filename\_ext.fastq && sed -n '1~4s/^@/>/p;2~4p' $filename\_ext.fastq > $filename\_ext.fa || exit 0\n"; print TMP 'if wait; then echo "'."$filename FASTA completed successfully".'">>log.txt;fi'."\n"; } # BLAT if ($param{"blat"} =~ /^[Yy1]/) { print TMP "module load ucsc/20160601\nblat -minScore=20 -minIdentity=5 -maxIntron=10000 -tileSize=11 $param{'oligo_file'} $filename\_ext.fa $filename\_ext.psl\n"; print TMP 'if wait; then echo "BLAT completed successfully">>log.txt;fi'."\n"; } # Split file into multiple files if (($param{"splitter"} =~ /^[Yy1]/) or ($param{"MNase_multi"} =~ /^[Yy1]/)) { print TMP "perl $param{'master_folder'}/splitter.pl -f $filename\_ext.fastq -p $filename\_ext.psl -r $param{'reference'}$filename -limit $param{'MNase_Multi_limit'} -all\n"; print TMP 'if wait; then echo "'."$filename splitter completed successfully".'">>log.txt;fi'."\n"; } #Gzip to zip the fastq and other files if you have finished using them - it also deletes intermediate files if ($param{"gzip"} =~ /^[Yy1]/) { if (-e "$filename\_R1.fastq"){print TMP "gunzip $filename\_R1.fastq\n"} if (-e "$filename\_R2.fastq"){print TMP "gunzip $filename\_R2.fastq\n"} } if (($param{"Align_pipe"} =~ /^[Yy1]/) or ($param{"Pipe2"} =~ /^[Yy1]/)) { print TMP "perl $param{'master_folder'}/MicroC-analysis_pipeline_part2.pl -p $path/$param{'reference'}$filename -n $param{'reference'}$filename -i $path/$input_file -q\n"; print TMP 'if wait; then echo "'."######################################################################################### $filename Pipe2 started".'">>log.txt;fi'."\n"; } close TMP; # qsub the command to sun grid engine. if ($param{"qsub"} =~ /^[Yy1]/) { MCC::submit("tmp.sh", $filename); # Make a log file system ('cat tmp.sh >> tmplog.txt echo " ######################################################################################### ">>tmplog.txt') } } sub load_config_file { my ($config_file, $param, $files) = @_; open (FH, $config_file) or die "Couldn't open input file $config_file"; while (my $line = ) { chomp $line; if ($line =~ /^#/){} elsif ($line =~ /^\s*$/){} elsif (($line =~ /^perl/) or ($line =~ /^exit/)){} elsif (($line =~ /(.*)\s*=\s*(.*)/) or ($line =~ /(.*)\s*=\s*(.*)\s*#.*/)) { if ($1 eq "file"){push @$files, $2} else{$$param{$1}=$2} } else {print "line of input file not read: $line\n"} } }