#!/usr/bin/perl -w use warnings; use strict; open LIST,"list.txt" or die; # containing the files (.snps.tsv) of all samples; open IN1, "batch_1.catalog.tags.tsv" or die; #the file is one of the output files in the pipeline - cstacks; open OUT, ">reference_S.fasta" or die; #the sequences of stacks as single-copy locus; open OUT1,">reference_1_RE.fasta" or die; #the sequences of stacks that 1 individual is repetitive; open OUT2,">reference_D_RE.fasta" or die; #the sequences of stacks that >1 individual is repetitive; my (%DP,%DP_eva); while (<LIST>){ chomp; open IN, $_ or die; while (<IN>) { chomp; my @line=split /\t/, $_; my $ID=$line[1]."_".$line[2]; $DP{$ID}++; } close IN; } close LIST; my $ind_number_th=35; # the value is equal to the half number of all analyzed samples; my %sequences; while (<IN1>) { chomp; my @line=split /\t+/, $_; my @individuals=split /,/, $line[7]; my %numbers; my $count_RE=0; my $rep_count=0; foreach (@individuals){ if(exists $DP{$_}){ $rep_count++; } my @line1=split /_/, $_; $numbers{$line1[0]}++; } next, if (($rep_count>2)||(keys %numbers<$ind_number_th)); foreach (keys %numbers){ $count_RE ++,if($numbers{$_}>1); } next, if ($count_RE>2); if($count_RE >1){ print OUT2 ">".$line[2],"\n",$line[8],"\n"; } elsif($count_RE >0){ print OUT1 ">".$line[2],"\n",$line[8],"\n"; }else{ print OUT ">".$line[2],"\n",$line[8],"\n"; } } close IN1;