#!/usr/bin/perl -w use warnings; use strict; use autodie; open GATK, "3pines_GATK_M1_common.vcf" or die; open STACKS, "3pines_stacks_M1_common.vcf" or die; open OUT, ">3pines_unified_snp_all_M1.vcf" or die; my %SNPs_GATK; my %SNPs_stacks; my %SNPs_stacks1; my %SNPs_stacks2; my %SNPs_GATK1; my %SNPs_GATK2; my $count_DP=0; while (){ my ($genotype_1,$coverage_1,$coverage_1a); chomp; my @temp_1=split(/\t/,$_); my $SNP_id_1=$temp_1[0]."_".$temp_1[1]; my @GT_alt = split /,/,$temp_1[4]; my $i=0; my (@GT_1, @DP_1, @DP_1a); my ($GT_1, $DP_1, $DP_1a); for ($i=0;$i<=($#temp_1-9);$i++){ if($temp_1[9+$i] eq "."){ $GT_1 = "."; $DP_1 = 0; $DP_1a = 0; } elsif($temp_1[9+$i] =~ /^0:/){ $GT_1 = $temp_1[3]; my @line_1=split /:/,$temp_1[9+$i]; my @line_1a=split /,/,$line_1[1]; $DP_1 = $line_1[2]; $DP_1a=$line_1a[0]; }elsif($temp_1[9+$i] =~ /^1:/){ $GT_1=$GT_alt[0]; my @line_1=split /:/,$temp_1[9+$i]; my @line_1a=split /,/,$line_1[1]; $DP_1 = $line_1[2]; $DP_1a=$line_1a[1]; }else{ $GT_1=$GT_alt[1]; my @line_1=split /:/,$temp_1[9+$i]; my @line_1a=split /,/,$line_1[1]; $DP_1 = $line_1[2]; $DP_1a=$line_1a[1]; } push @GT_1,$GT_1; push @DP_1,$DP_1; push @DP_1a,$DP_1a; } $genotype_1=join ("_",@GT_1); $coverage_1=join ("_",@DP_1); $coverage_1a=join ("_",@DP_1a); $SNPs_GATK{$SNP_id_1}=$genotype_1; $SNPs_GATK1{$SNP_id_1}=$coverage_1; $SNPs_GATK2{$SNP_id_1}=$coverage_1a; } close GATK; while (){ chomp; my @temp_2=split(/\t/,$_); my $SNP_id_2=$temp_2[0]."_".$temp_2[1]; my (@GT_2,@DP_2); my ($GT_2,$DP_2); my $h=0; for ($h=0;$h<=($#temp_2-9);$h++){ my @line_2=split /:/,$temp_2[9+$h]; $DP_2=$line_2[1]; if ($temp_2[9+$h] =~ /^0\/0/){ $GT_2=$temp_2[3]; }elsif($temp_2[9+$h] =~ /^1\/1/){ $GT_2=$temp_2[4]; }else{ $GT_2="."; } push @GT_2, $GT_2; push @DP_2, $DP_2; } my $genotype_2=join ("_",@GT_2); my $coverage_2=join ("_",@DP_2); $SNPs_stacks{$SNP_id_2}=$genotype_2; $SNPs_stacks1{$SNP_id_2}=$coverage_2; $SNPs_stacks2{$SNP_id_2}=$_; #print $genotype_2,"\n"; } close STACKS; foreach (sort keys %SNPs_GATK1){ my $j=0; my $count=0; my $count_1=0; my $i=0; if($SNPs_GATK{$_} eq $SNPs_stacks{$_} ){ print OUT $SNPs_stacks2{$_},"\n"; }else{ my @DP1=split /_/,$SNPs_GATK1{$_}; my @DP1a=split /_/,$SNPs_GATK2{$_}; my @DP2=split /_/,$SNPs_stacks1{$_}; foreach (@DP1){ if (($_ - $DP1a[$j]) > 2){ $count++; } $j++; } if($count!=0){ print OUT $SNPs_stacks2{$_},"\n"; $count_DP++; }else{ my @info=split /\t/,$SNPs_stacks2{$_}; my @GT1=split /_/,$SNPs_GATK{$_}; my @GT2=split /_/,$SNPs_stacks{$_}; my @DP1a=split /_/,$SNPs_GATK2{$_}; my @DP2=split /_/,$SNPs_stacks1{$_}; my @GT; my $vcf; foreach (@GT2){ if (($_ eq $GT1[$i])||($GT1[$i] eq ".")){ $vcf = $info[9+$i]; push @GT, $vcf; }elsif ($_ ne "."){ $vcf = $info[9+$i]; push @GT, $vcf; }elsif($GT1[$i] eq $info[3]){ $vcf = "0\/0:$DP1a[$i]:.,.,."; push @GT, $vcf; }elsif($GT1[$i] eq $info[4]){ $vcf = "1\/1:$DP1a[$i]:.,.,."; push @GT, $vcf; }else{ $vcf = "2\/2:$DP1a[$i]:.,.,."; $info[4]=$info[4].",".$GT1[$i]; push @GT, $vcf; } $i++; } my $genotype=join ("\t",@GT); my $s1 = ($genotype =~ s/0\/0/0\/0/); my $s2 = ($genotype =~ s/1\/1/1\/1/); my $s3 = ($genotype =~ s/2\/2/2\/2/); my $AF1 = ($s1/($s1+$s2+$s3)); my $AF2 = ($s2/($s1+$s2+$s3)); my $AF3 = ($s3/($s1+$s2+$s3)); for($i=0;$i<7;$i++){ print OUT $info[$i]."\t"; } if ($s3==0){ print OUT "NS=".($s1+$s2).";"."AF="; printf OUT ("%.3f",$AF1); print OUT ":"; printf OUT ("%.3f",$AF2); print OUT ";"."\t"; }else{ print OUT "NS=".($s1+$s2+$s3).";"."AF="; printf OUT ("%.3f",$AF1); print OUT ":"; printf OUT ("%.3f",$AF2); print OUT ":"; printf OUT ("%.3f",$AF3); print OUT ";"."\t"; } print OUT $info[8]."\t"; print OUT $genotype,"\n"; } } } # print $count_DP."\n";