#!usr/bin/perl -w

## Written by Sarah B. Carey

## this script takes in pruned trees (from prune_tree.py) renames each species
## to just have the species name (i.e., strips gene ID information)
## and removes the branch lengths in preparation for running PAML
## this script also takes in the fasta files (end in *.out.fa) that have  
## the same sequences as the gene tree and renames those for PAML

#usage = "perl <script_name>";

@filearray = glob("cluster*\_pruned.tre");
for $file(@filearray)
{
    if ($file =~ m/cluster(\d+)\_pruned.tre/)
    {
	$filenum = $1;

$fasta_file = "cluster$filenum.out.fa";
$tree_file = "cluster$filenum\_pruned.tre";

open IN, "<$fasta_file" or die "Fasta file not working\n";
open IN2, "<$tree_file" or die "Tree not working\n";

open OUT, ">cluster$filenum.paml.fa";
open OUT2, ">cluster$filenum.paml.tre";

while (<IN>)
	{	$line = $_;
		
		if ($line =~ m/(\>[a-z]+\_[a-z0-9]+)(\w+)/i)
		{
			$species = $1;
			$seq = (<IN>);
		}

		print OUT "$species\n$seq";
	}			
			
while (<IN2>)
	{	
		$line = $_;
		$line =~ s/:(\d+)e-\d+//g;
		$line =~ s/:(\d+).\d+e-\d+//g;
		$line =~ s/:(\d+).\d+//g;
		$line =~ s/\./_/g;
		$line =~ s/([a-z]+\_[a-z0-9]+)(\w+)/$1/gie;

		print OUT2 "$line";
	}	

}
}	

close IN;
close IN2;
close OUT;
close OUT2;


