#!/usr/bin/perl -w
use strict;
use File::Basename;
use File::Path qw{remove_tree};
use File::Find;
use Getopt::Long qw(GetOptions);
use Bio::SeqIO;
use Bio::Seq;
use Cwd qw{abs_path};
use Benchmark;
use Pod::Usage qw(pod2usage);

################## initiate variables
my $max_insert_size=1000;
my $format='fastq';
my $project_id='081104';
my $nthreads=12;
my $nct=3; 
my $mem='64g';
my $picard_uppmax='/sw/apps/bioinfo/picard/1.141/milou/picard.jar'; ### latest version support java 1.7
my $gatk_uppmax='/sw/apps/bioinfo/GATK/3.5.0/GenomeAnalysisTK.jar';
my $ploid=2;
my $hard_filter=''; 
my $help=0;
my $tmp_dir='';
my $postfix='';

if ((@ARGV==0) && (-t STDIN)) {
	 pod2usage(-verbose => 0, -message =>  "$0: Argument required.\n");
}
my $command_line=$0.' '.join(" ",@ARGV);

&GetOptions('in=s'		=> \my $source_dir,
			'out=s'		=> \my $work_dir,
			'ref=s'		=> \my $ref_full, 
			'expref=s'	=>	\my $ref_capture,
			'size=s'	=> \$max_insert_size,
			'format=s' 	=> $format,
			'proj=i'	=> $project_id,
			'threads=i' => \$nthreads,
			'nct=i'		=> \$nct,
			'memory=s'	=> \$mem,
			'post=s'	=> \$postfix,
			'picard=s'	=> \$picard_uppmax,
			'gatk=s'	=> \$gatk_uppmax,
			'tmp=s'		=> \$tmp_dir,  # TEMP=${SNIC_TMP:-.} for uppmax
			'filter=s'	=> \$hard_filter,
			'd=i'		=> \$ploid,
            'help|?'	=>	\$help) or pod2usage(-msg => 'Wrong options', -verbose => 1);

pod2usage(-verbose=>2) if $help;		
pod2usage(-msg => 'Please give a directory name for sequence files', -verbose=>2) unless($source_dir);
pod2usage(-msg => 'Please give a genome for bwa mapping', -verbose=>2) unless($ref_full);
pod2usage(-msg => 'Please give a genome for capture positions', -verbose=>2) unless($ref_capture);
pod2usage(-msg => 'Please give a name for output  directory', -verbose=>2) unless($work_dir);

print STDERR "\n####################################################################################################\n";
printf STDERR "%40s%20s\n","ANALYSIS","STARTED!";
print STDERR "####################################################################################################\n\n";
print STDERR "$command_line\n\n";

my $start_run=new Benchmark;


mkdir $work_dir or die "failed to create working directory $work_dir: $!" unless(-d $work_dir);
if($tmp_dir eq '' or !defined $tmp_dir){
	$tmp_dir=$work_dir.'/tmp'; 
}
mkdir $tmp_dir or die "failed to create tmp directory $tmp_dir: $!" unless(-d $tmp_dir);

my $samtools_version=`samtools 2>&1 >/dev/null | grep 'Version' `;
my ($samtools_verNum)=$samtools_version=~/Version:\s+(\d)/;
print STDERR "samtools version $samtools_verNum found\n";
die "No samtools found\n" unless($samtools_verNum==0 || $samtools_verNum==1);

### index genome
index_mapping_genome($ref_full);
my $bed_file=index_capture_genome($ref_capture);

my @files=get_filenames_recursive($source_dir, $format);
print STDERR scalar @files, " $format files detected in the folder $source_dir\n";
my %samples=();
foreach my $file_full_name (@files){
	my $filename=fileparse($file_full_name, qw{.fastq .fastq.gz .fasta .fasta.gz});
	my ($sample_id, $mate_id)=$filename=~/$project_id[\-\_](P\d+[\-\_]W[A-H]\d{1,2})\S*[\-\_](R[12])[\-\_]/;
	next unless(defined $sample_id and defined $mate_id);
	$samples{$sample_id}->{$mate_id}{$file_full_name}=1;
}
#
foreach my $sample_id (sort keys %samples){
	my $sample_start=new Benchmark;
	print STDERR "Processing $sample_id \n";
	########### combined mate1 and mate2
	if(scalar keys %{$samples{$sample_id}->{'R1'}} != scalar keys %{$samples{$sample_id}->{'R2'}} || scalar keys %{$samples{$sample_id}->{'R1'}}==0){
		print scalar  keys %{$samples{$sample_id}->{'R1'}}, " files found in mate1 and ",  scalar keys %{$samples{$sample_id}->{'R2'}}," files found in mate2\n";
		die "Problem in sequence naming of $sample_id: $!" ;
	}
	my $sample_mate1=join(' ', sort keys %{$samples{$sample_id}->{'R1'}});
	my $sample_mate2=$sample_mate1;
	$sample_mate2=~s/_R1_/_R2_/g;
	my $file_mate1 = $work_dir.'/'.$sample_id.'_R1.fastq.gz';
	my $file_mate2 = $work_dir.'/'.$sample_id.'_R2.fastq.gz';
	my ($cat_cmd1,$cat_cmd2)=('cat', 'cat');

	if($sample_mate1=~/\.gz/){
		$cat_cmd1='zcat';
	}
	if($sample_mate2=~/\.gz/){
		$cat_cmd2='zcat';
	}
	$cat_cmd1.=" $sample_mate1 | gzip -c > $file_mate1";
	$cat_cmd2.=" $sample_mate2 | gzip -c > $file_mate2";
	my @cat_cmds=();
	push @cat_cmds, $cat_cmd1 unless(-e $file_mate1);
	push @cat_cmds, $cat_cmd2 unless(-e $file_mate2);
	if(@cat_cmds){
		print STDERR "\tConcatenate reads by mate\n\t\t",join("\n\t\t", @cat_cmds),"\n";
		parallele_machine_by_splice(\@cat_cmds, 2, \&run_cmd);
	}
	my $mid_run=new Benchmark;
	print STDERR "\nIt took ", timestr(timediff($mid_run,$sample_start))," to merge zip files\n";
	
	#### mapping 
	print STDERR "\tBWA mapping\n";
	my $bam_file=$work_dir.'/'.$sample_id.'_srt.bam';
	my $bwa_cmd="bwa mem -P -t $nthreads -R \"\@RG\\tID:$sample_id\\tSM:$sample_id\"  $ref_full $file_mate1 $file_mate2 |";
	if($samtools_verNum==1){
		$bwa_cmd.=" samtools view -b - | samtools sort -T tmp -m $mem - > $bam_file";
	}else{
		my $bam_file_prefix=$work_dir.'/'.$sample_id.'_srt';
		$bwa_cmd.=" samtools view -Sb - | samtools sort - $bam_file_prefix"; # old samtools you can ignore the warning message about the truncated file
	}
	
	
	unless(-e $bam_file){
		print STDERR "\t\t$bwa_cmd\n";
		system($bwa_cmd) == 0 or die "system $bwa_cmd failed: $?";
	}
	my $mid_run_start=$mid_run;
	$mid_run=new Benchmark;
	print STDERR "\nBWA mapping and BAM sorting took ", timestr(timediff($mid_run,$mid_run_start)),"\n";
	
	#### filter chr, insert size
	my $filter_bam_file=$bam_file;
	$filter_bam_file=~s/\.bam/_filtChrIS\.bam/;
	unless(-e $filter_bam_file){
		print STDERR "\tFiltering bam by insert size\n";
		open(IN, "samtools view -h $bam_file |") or die "Failed to pipe from samtools: $!";
		open(OUT, "| samtools view -Sb - > $filter_bam_file") or die "Fail to pipe to samtools: $!";
		while(<IN>){
			if(/^@/){
				print OUT $_;
			}else{
				my ($flag, $mate_chr,$insert_size)=(split/\s+/)[1, 6, 8];# mapq will be applied by GATK
				next if($flag & 256 || $flag & 4 || $mate_chr ne '=' || $insert_size > $max_insert_size || $insert_size < - $max_insert_size);
				print OUT $_;
			}
		}
		close(IN);
		close(OUT);
	}
	$mid_run_start=$mid_run;
	$mid_run=new Benchmark;
	print STDERR "\nFiltering chr and insert size took ", timestr(timediff($mid_run,$mid_run_start)),"\n";
	
	########################## keep only those in exome capture ref
	my $exome_capture_bam=$filter_bam_file;
	$exome_capture_bam=~s/\.bam$/_exCap.bam/;
	my $capture_cmd="samtools view -h -L $bed_file $filter_bam_file | grep -v '^\@SQ' | samtools view -Sb -t $ref_capture\.fai -  > $exome_capture_bam";
	unless(-e $exome_capture_bam){
		print STDERR "\t\t$capture_cmd\n";
		system($capture_cmd) == 0 or die "system $capture_cmd failed: $?";
	}
	$mid_run_start=$mid_run;
	$mid_run=new Benchmark;
	print STDERR "\nCapture BAM on BED file took ", timestr(timediff($mid_run,$mid_run_start)),"\n";
	
	## PICARD remove duplicates
	print STDERR "\tRemoving duplicates\n";
	my $bam_dupRm_file=$exome_capture_bam;
	$bam_dupRm_file=~s/\.bam$/_dupRm\.bam/;
	my $metrix_file=$bam_dupRm_file.'.metrics.txt';
	my $dup_cmd="java -Xmx".$mem." -jar $picard_uppmax MarkDuplicates ASSUME_SORTED=true INPUT=$exome_capture_bam TMP_DIR=$tmp_dir OUTPUT=$bam_dupRm_file METRICS_FILE=$metrix_file VALIDATION_STRINGENCY=LENIENT CREATE_INDEX=true";

	unless(-e $bam_dupRm_file){
		print STDERR "\t\t$dup_cmd\n";
		system($dup_cmd) == 0 or die "system $dup_cmd failed: $?";
	}	
	$mid_run_start=$mid_run;
	$mid_run=new Benchmark;
	print STDERR "\nMarkDuplicates took ", timestr(timediff($mid_run,$mid_run_start)),"\n";
	
	### GATK indel realign
	print STDERR "\tRealigning\n";
	my $interval_file=$bam_dupRm_file.'.intervals';
	my $interval_cmd="java -Djava.io.tmpdir=$tmp_dir -Xmx".$mem."  -jar $gatk_uppmax -nt $nthreads -I $bam_dupRm_file -R $ref_capture -T RealignerTargetCreator -o $interval_file";
	unless(-e $interval_file){
		print STDERR "\t\t$interval_cmd\n";
		system($interval_cmd)== 0 or die "system $interval_cmd failed: $?";
	}
	my $realign_bam_file=$bam_dupRm_file;
	$realign_bam_file=~s/\.bam$/_realigned\.bam/;
	my $realign_cmd="java -Djava.io.tmpdir=$tmp_dir -Xmx".$mem." -jar $gatk_uppmax -I $bam_dupRm_file -R $ref_capture -T IndelRealigner --targetIntervals $interval_file -o $realign_bam_file";
	unless(-e $realign_bam_file){
		print STDERR "\t\t$realign_cmd\n";
		system($realign_cmd) == 0 or die "system $realign_cmd failed: $?";
	}
	$mid_run_start=$mid_run;
	$mid_run=new Benchmark;
	print STDERR "\nRealignment took ", timestr(timediff($mid_run,$mid_run_start)),"\n";
	
	
	print STDERR "\t\tsamtools flagstat $realign_bam_file\n";
	system("samtools flagstat $filter_bam_file")==0 or die "system samtools flagstat failed: $?";
	
	unless(-e $realign_bam_file.'.bai'){my $bam_index_cmd=`samtools index $realign_bam_file`};
	##### GATK 
	print STDERR "\trun HaplotypeCaller\n";
	my $num_child=$nthreads/$nct; ## 12 /3 =4
	my ($ref_basename, $ref_path)=fileparse($ref_capture, qw{\.fa});
	my @gatk_interval=();
	foreach (1..$num_child){
		my $gatk_interval=$ref_path.$ref_basename.'-'.$_.'.intervals';
		push @gatk_interval, $gatk_interval;
	}
	
	create_interval_by_fasta($ref_capture, $num_child) unless(-e $gatk_interval[0]);
	my $hap_mem=$mem;
	$hap_mem=~s/g$//i;
	$hap_mem=int($hap_mem/$num_child).'g';
	parallele_machine_by_splice(\@gatk_interval, $num_child, \&haplotypeCaller, $ref_capture, $sample_id, $work_dir, $gatk_uppmax, $tmp_dir, $realign_bam_file, $nct, $ploid, $hap_mem);
	
	my @gvcf_part_files=();
	foreach (1..$num_child){
		my $hap_caller_file=$work_dir.'/'.$sample_id.'-HC-'.$_.'-p'.$ploid.'.g.vcf';
		push @gvcf_part_files, $hap_caller_file;
	}
	### concatenate gvcf
	my $gvcf_file=$work_dir.'/'.$sample_id.'-HC-p'.$ploid.'.g.vcf';
	my $catGVCF_cmd="java -cp $gatk_uppmax org.broadinstitute.gatk.tools.CatVariants -R $ref_capture -V ".join(" -V ", @gvcf_part_files)." -out  $gvcf_file -assumeSorted";
	system($catGVCF_cmd) == 0 or die "system failed $catGVCF_cmd: $?";
		
	# my $con_gvcf_cmd='vcf-concat '.join(" ", @gvcf_part_files)." > $gvcf_file";
	# system($con_gvcf_cmd) == 0 or die "system failed $con_gvcf_cmd: $?";
	
	
	
	# my $hap_caller_file=$work_dir.'/'.$sample_id.'-HC-p'.$ploid.'.g.vcf';
	# my $hap_caller_cmd="java -Djava.io.tmpdir=$tmp_dir -Xmx".$mem." -jar $gatk_uppmax -nct $nthreads -ploidy $ploid --emitRefConfidence GVCF -I $realign_bam_file  -R $ref_capture -T HaplotypeCaller -o $hap_caller_file -gt_mode DISCOVERY";
	# print STDERR "\t\t$hap_caller_cmd\n";
	# system($hap_caller_cmd) == 0 or die "system $hap_caller_cmd failed: $?";
	$mid_run_start=$mid_run;
	$mid_run=new Benchmark;
	print STDERR "\nSNP calling took ", timestr(timediff($mid_run,$mid_run_start)),"\n";
	
	### clean
	unlink $bam_dupRm_file, $bam_dupRm_file.'.bai', $bam_file, $filter_bam_file,$filter_bam_file.'.bai', $file_mate1, $file_mate2, $interval_file, $metrix_file, $exome_capture_bam;
	unlink @gvcf_part_files;
	$mid_run=new Benchmark;
	print STDERR "\nThe sample $sample_id took ", timestr(timediff($mid_run,$sample_start))," in total\n";
}

# print STDERR "Combine all GVCF together\n";
# if($postfix eq ''){
# 	my @sample_ids=sort keys %samples;
# 	my %plate_id=();
# 	foreach (@sample_ids){
# 		my $plate_id=(split/\-|\_/)[0];
# 		$plate_id{$plate_id}=1;
# 	}
# 	$postfix=join('-', sort keys %plate_id);
# }
# my $combined_gvcf=$work_dir.'/combined_'.$postfix.'.g.vcf';
#
# my @gvcf_files=get_filenames_recursive($work_dir, 'g.vcf');
# my $combineGVCF_cmd="java -Xmx".$mem." -jar $gatk_uppmax -T CombineGVCFs -R $ref_capture -V ".join(" -V ", @gvcf_files)." -o $combined_gvcf";
# print STDERR "\t\t$combineGVCF_cmd\n";
# system($combineGVCF_cmd)  == 0 or die "system $combineGVCF_cmd failed: $?";

# print STDERR "SNP calling on combined VCF\n";
my $end_run=new Benchmark;
print STDERR "\nJob took ", timestr(timediff($end_run,$start_run)),"\n";
print STDERR "\n####################################################################################################\n";
printf STDERR "%40s%20s\n","ANALYSIS","COMPLETED!";
print STDERR "####################################################################################################\n\n";

################################## functions
# sub bwa_mapping{
# 	my ($sample_id, $file_mate1, $file_mate2,$ref_full, $work_dir, $nthreads,$mem)=@_;
# 	my $bam_file=$work_dir.'/'.$sample_id.'_srt_filt.bam';
# 	return ($bam_file) if(-e $bam_file);
# 	my $bwa_cmd="bwa mem -P -t $nthreads -R \"\@RG\\tID:$sample_id\\tSM:$sample_id\"  $ref_full $file_mate1 $file_mate2";
# 	my $sort_bam_cmd="samtools view -u - | samtools sort -T tmp -m $mem - > $bam_file";
# 	open(IN, "$bwa_cmd |") or die "Failed to create a pipe connecting bwa: $!";
# 	open(OUT, "| $sort_bam_cmd") or die "Fail to create a pipe connectiong samtools: $!";
# 	print STDERR "\t\t$bwa_cmd\n\t\t$sort_bam_cmd\n";
# 	while(<IN>){
# 		if(/^@/){
# 			print OUT $_;
# 		}else{
# 			my ($flag, $mate_chr,$insert_size)=(split/\s+/)[1, 6, 8];# mapq will be applied by GATK
# 			next if($flag & 256 || $flag & 4 || $mate_chr ne '=' || $insert_size > $max_insert_size || $insert_size < - $max_insert_size);
# 			print OUT $_;
# 		}
# 	}
# 	close(IN);
# 	close(OUT);
# 	return ($bam_file);
# }
sub haplotypeCaller{
	my ($interval, $ref_capture, $sample_id, $work_dir, $gatk_uppmax, $tmp_dir, $realign_bam_file, $nct, $ploid, $mem)=@_;
	my ($index)=$interval=~/\-(\d+)\.intervals$/;
	my $hap_caller_file=$work_dir.'/'.$sample_id.'-HC-'.$index.'-p'.$ploid.'.g.vcf';
	my $hap_caller_cmd="java -Djava.io.tmpdir=$tmp_dir -Xmx".$mem." -jar $gatk_uppmax -nct $nct -ploidy $ploid --emitRefConfidence GVCF -I $realign_bam_file  -R $ref_capture -T HaplotypeCaller -o $hap_caller_file -gt_mode DISCOVERY -L $interval";
	system($hap_caller_cmd)== 0 or die "system $hap_caller_cmd failed: $?";
}

sub create_interval_by_fasta{
	my ($seq_file, $num_files)=@_;
	my $in=Bio::SeqIO->new(-file => $seq_file, -format => 'fasta');
	my @id=();
	while(my $seqObj=$in->next_seq){
		push @id, $seqObj->id;
	}
	print STDERR scalar @id, " sequences have been read from $seq_file\n";
	my ($file_name, $path)=fileparse($seq_file, qw{\.fa});
	my $basename=$path.$file_name;
	my $seq_per_file=int(scalar @id / $num_files);
	my $output=undef;
	foreach my $index (1..$num_files){
		$output=$basename.'-'.$index.'.intervals';
		open(INTER, ">$output") or die "Cannot write to $output: $!";
		my @interval_to_write=splice(@id,0, $seq_per_file);
		print INTER join("\n", @interval_to_write),"\n";
		print STDERR $seq_per_file, " intervals have been written into $output\n" unless($index==$num_files);
		close(INTER);
	}
	if(@id){
		open(INTER, ">>$output") or die "Cannot write to $output: $!";
		print INTER join("\n", @id),"\n";
		close(INTER);
	}
	print STDERR $seq_per_file + scalar @id, " intervals have been written into $output\n";
}
sub index_capture_genome{
	my $ref=shift;
	unless(-e $ref.'.fai'){
		my $ref_index_cmd="samtools faidx $ref ";
		print STDERR "Cannot find the fai file for $ref, start to index the reference\n\t$ref_index_cmd\n";
		system($ref_index_cmd)==0 or die "system $ref_index_cmd failed: $?";
	}
	my $genome_dict_file=$ref;
	$genome_dict_file=~s/\.\w+$//;
	my $bed_file=$genome_dict_file.'.bed';
	$genome_dict_file.='.dict';
	unless(-e $genome_dict_file){
		my $genome_dict_cmd="java -Xmx".$mem." -jar $picard_uppmax CreateSequenceDictionary R=$ref O= $genome_dict_file";
		print STDERR "Cannot locate dict file for $ref, start to createSequenceDict file\n\t$genome_dict_cmd\n";
		system($genome_dict_cmd) == 0 or die "system $genome_dict_cmd failed: $?";
	}
	unless(-e $bed_file){
		use Bio::SeqIO;
		my $in=Bio::SeqIO->new(-file => $ref, -format=>'fasta');
		open(BED, ">$bed_file") or die "Cannot open $bed_file: $!";
		while(my $seqObj=$in->next_seq){
			my $id=$seqObj->id;
			my $length=$seqObj->length;
			print BED "$id\t1\t$length\n";
		}
		close(BED);
	}
	return($bed_file);
}

sub index_mapping_genome{
	my $ref=shift;
	unless(-e $ref.'.pac' && -e $ref.'.amb' && -e $ref.'.ann' && -e $ref.'.bwt' && -e $ref.'.sa'){
		my $genome_bwa_index_cmd="bwa index $ref";
		print STDERR "Cannot locate bwa index file for $ref, start to do genome indexing\n\t$genome_bwa_index_cmd\n";
		system($genome_bwa_index_cmd) == 0 or die "system $genome_bwa_index_cmd failed: $?";
	}	
}

sub run_cmd{
	my ($cmd, @options)=@_;
	my @args=($cmd, @options);
	system(@args)== 0 or die "system @args failed: $?";
}

sub parallele_machine{
	my ($array, $function, @options)=@_;
	my @child;
	foreach my $child_process (@$array){
		my $pid=fork();
		if($pid){
			push @child, $pid; # parent process
		}elsif($pid==0){ # child process
			$function->($child_process, @options);
			exit 0;
		}else{
			die "Cannot fork: $!";
		}
	}
	foreach (@child){
		my $tmp=waitpid($_,0);		
	}
}

sub parallele_machine_by_splice{
	my ($array, $num_threads, $function_ref, @options)=@_;
	my $array_length=scalar @$array;
	my @child_process=();
	# initiate
	if($num_threads <= $array_length){
		@child_process=splice(@$array, 0, $num_threads);
	}else{
		@child_process=@$array;
	}
	
	while(@child_process){
		parallele_machine(\@child_process, $function_ref, @options);
		if($num_threads <= $array_length){
			@child_process=splice(@$array, 0, $num_threads);
		}else{
			@child_process=splice(@$array, 0);
		}
	}
}
sub get_filenames_recursive{
	my ($parent_path, $filter)=@_;
	$parent_path=abs_path($parent_path);
	my @filenames=();
	find sub{
		push @filenames, $File::Find::name if(-f $File::Find::name && grep(/\.$filter/i, $_));
	}, $parent_path;
	return(@filenames);
}

__END__

###################################### usage
=head1 NAME


=head1 SYNOPSIS

 perl exomeCapture-pipeline.pl -in <input dir> 
 							   -out <output dir> 
							   -ref <ref genome> 
							   -expref <capture genome>
							   -size <max insert size>
							   -format <fastq|fasta|txt>
							   -proj [project id] 
							   -post <g.vcf file name>
							   -threads <number of threads: 12>
							   -nct <number of cores per data thread for HaplotypeCaller: 3> 
 							   -memory <java memory: 56G> 
							   -picard <picard dir>
							   -gatk <gatk dir>
							   -tmp <TEMP dir>
							   -filter <additional gatk filtering for haplotypeCaller: default>
							   -d <ploid: 2>
							   -help            
		   
=head1 OPTIONS

=over 4

=item B<--in>

where you keep sequencing data

=item B<--format>

what kind of file extension used for the sequence file [fasta, fastq, or txt], regardless of gz 

=item B<--out>

output directory you wish to write results into

=item B<--ref>

genome reference file in fasta format for bwa mapping

=item B<--expref>

genome reference file in fasta format for capture sequencing positions, used to call snp

=item B<--post>

postfix used to name combined .g.vcf file

=item B<--size>

max insert size between mates defaults: 1000 for exome capture the design is around 400bp

=item B<--proj>

project id used to include in the sequence file name, leave it blank if none

=item B<--threads>

number of threads needed max 16 for one uppmax node

=item B<--threads>

number of cpu cores per data thread used in HaplotypeCaller, default 3. Increase to higher value seldomly speed up the computing

=item B<--memory> 

RAM needed for samtools, picard, and GATK. default 56G for uppmax more memory needed -C 

=item B<--picard | --gatk>

where to find the picard and gatk jar 

=item B<--tmp>

TMP directory for picard and Java GATK

=item B<--filter> 

we use default setting for HaplotypeCaller 
	-HCMappingQualityFilter
	
	-MalformedReadFilter
	
	-BadCigarFilter
	
	-UnmappedReadFilter
	
	-NotPrimaryAlignmentFilter
	
	-FailsVendorQualityCheckFilter
	
	-DuplicateReadFilter
	
	-MappingQualityUnavailableFilter
	
add more option to this string

=item B<--d> 

number of ploidy default:2

=item B<--help> 

to show this message

=back


=head1 AUTHORS

Jun Chen modified from the pipeline of John Baison SLU Umea

=cut
