#!/usr/bin/perl -w

use warnings;
use strict;

open LIST,"list.txt" or die;  # containing the files (.snps.tsv) of all samples;
open IN1, "batch_1.catalog.tags.tsv" or die;            #the file is one of the output files in the pipeline - cstacks;
open OUT, ">reference_S.fasta" or die; #the sequences of stacks as single-copy locus;
open OUT1,">reference_1_RE.fasta" or die; #the sequences of stacks that 1 individual is repetitive;
open OUT2,">reference_D_RE.fasta" or die; #the sequences of stacks that >1 individual is repetitive;



my (%DP,%DP_eva);

while (<LIST>){
        chomp;
        open IN, $_ or die;
            
        
        while (<IN>) {
	chomp;
	my @line=split /\t/, $_;
	my $ID=$line[1]."_".$line[2];
	$DP{$ID}++;
        
	}
	close IN;
}
close LIST;

my $ind_number_th=35;  # the value is equal to the half number of all analyzed samples; 
my %sequences; 

while (<IN1>) {
	chomp;
	my @line=split /\t+/, $_;
	my @individuals=split /,/, $line[7];
        
        my %numbers;
        my $count_RE=0;
        my $rep_count=0;
        foreach (@individuals){
        	if(exists $DP{$_}){
        		$rep_count++;
        	}
        	my @line1=split /_/, $_;
        	$numbers{$line1[0]}++;
                }
        next, if (($rep_count>2)||(keys %numbers<$ind_number_th));
       
        foreach (keys %numbers){
    
        	$count_RE ++,if($numbers{$_}>1);
        	}
        next, if ($count_RE>2);
        if($count_RE >1){
        print OUT2 ">".$line[2],"\n",$line[8],"\n"; 
        }
        elsif($count_RE >0){
        print OUT1 ">".$line[2],"\n",$line[8],"\n"; 
        }else{
        print OUT ">".$line[2],"\n",$line[8],"\n"; 
        }
}
 close IN1;