$bwaversion = "bwa-0.7.12"; $samtoolsversion = "samtools-1.3"; if (@ARGV != 2){ print "USAGE: script.pl \n"; exit; } $tmpfa = $ARGV[1]; $tmpfan = length $tmpfa; $f3 = $tmpfan-3; $f6 = $tmpfan-6; $tmp0 = substr $tmpfa,$f3; $tmp1 = substr $tmpfa,$f6; if (!(($tmp0 eq ".fa") or ($tmp1 eq ".fasta"))){ print "USAGE: script.pl <0> <0>\n"; print "Extention of fasta file is \".fa\" or \".fasta\".\n"; exit; } $input = $ARGV[0]; $ref_file = $ARGV[1]; ########### for analysis using bwasw ######################################### print "analyzing .....\n"; system "/home/genome/$samtoolsversion/samtools faidx $ref_file"; $fai = $ref_file.".fai"; system "/home/genome/$bwaversion/bwa index -a is $ref_file"; system "/home/genome/$bwaversion/bwa mem $ref_file $input > tmp.sam"; ############################################################################################ ####### remove reads with long soft-clipped regions ############################# open (INTMP, "tmp.sam"); open (OUTTMP, ">ftmp.sam"); open (OUTTMP2, ">ShortTarget.sam"); while (){ $soft = 0; $match = 0; $m_rate = 0; @t = split /\t/,$_; if ($t[4] > 0){ @cig = (); $i = $t[5]; for ($j=0;$j<100;$j++){ if ($i =~ /^(\d+\D+)(.*)/){ push @cig,$1; $i = $2; } else { $j = 100; } } ####### ignore insertions for length-counting ##################################### foreach $i (@cig){ if ($i =~ /(\d+)S/){ $soft += $1; } if ($i =~ /(\d+)M/){ $match += $1; } if ($i =~ /(\d+)D/){ $match += $1; } } ############################################ $matchsoft = $match + $soft; if ($matchsoft != 0){ if ($match >= 20){ print OUTTMP "$_"; } else { print OUTTMP2 "$_"; } } } } close (INTMP); close (OUTTMP); $bampfx = $input.".sorted"; $bam = $bampfx.".bam"; $sam = $bampfx.".sam"; system "/home/genome/$samtoolsversion/samtools view -bt $fai ftmp.sam |/home/genome/$samtoolsversion/samtools sort - >$bam"; system "/home/genome/$samtoolsversion/samtools index $bam"; system "/home/genome/$samtoolsversion/samtools view -h $bam > $sam"; unlink <*tmp.sam>; unlink <*.amb>; unlink <*.ann>; unlink <*.rbwt>; unlink <*.rsa>; unlink <*.bwt>; unlink <*.pac>; unlink <*.rpac>; unlink <*.sa>; print "Mapped $input on $ref_file by BWASW.\n"; exit;