#!/usr/bin/perl -w

## Written by Sarah B. Carey

## this script takes the results of PAML codeml for each gene run in paml_bash.sh 
## and gets dN, dS, and dN/dS for GG1 and R40 for trees with no, 1, or 2 labels.
##  It also runs an AIC for these models and reports the best supported one



$file_1rate = $ARGV[0];
@file_array_1rate = glob "*$file_1rate";
$file_2rate = $ARGV[1];
@file_array_2rate = glob "*$file_2rate";
$file_3rate = $ARGV[2];
@file_array_3rate = glob "*$file_3rate";

$lnL_1rate = 0;
$lnL_2rate = 0;
$lnL_3rate = 0;
$omega_1rate = 0;
$omega_2rate_0 = 0;
$omega_2rate_1= 0;
$omega_3rate_0= 0;
$omega_3rate_1= 0;
$omega_3rate_2= 0;

$AIC_1rate = 0;
$AIC_2rate = 1;
$AIC_3rate = 2;


for $file_1rate (@file_array_1rate)
{
	if ($file_1rate =~ m /([\s\S]+)(\.1rate.paml.out)/)
	{
	$file_name = $1;
	}
	
	open IN, "<$file_1rate";
	while (<IN>)
	{
		$line_1rate = $_;	
		if ($line_1rate =~ m/(\lnL)([\s\S]+)(\-\S+)/)
		{
			$lnL_1rate = $3;
			$AIC_1rate = ((-2*$3)+(2*1))
		}
		elsif ($line_1rate =~ m/(omega)([\s\S]+)(=\s+)(\S+)/)
		{
			$omega_1rate = $4;
		} 
	}
}

for $file_2rate (@file_array_2rate)
{
	open IN2, "<$file_2rate";
	while (<IN2>)
	{
		$line_m2 =$_;
		if ($line_m2 =~ m/(\lnL)([\s\S]+)(\-\S+)/)
		{
			$lnL_2rate = $3;
			$AIC_2rate = ((-2*$3)+(2*2))
		}
		elsif ($line_m2 =~ m/(w[\s\S]+branches:)(\s+)(\S+)(\s)(\S+)/)
		{
			$omega_2rate_0 = $3;
			$omega_2rate_1= $5;
		} 
	}
}


for $file_3rate (@file_array_3rate)
{
	open IN3, "<$file_3rate";
	while (<IN3>)
	{
		$line_m3 =$_;
		if ($line_m3 =~ m/(\lnL)([\s\S]+)(\-\S+)/)
		{
			$lnL_3rate = $3;
			$AIC_3rate = ((-2*$3)+(2*3))
		}
		elsif ($line_m3 =~ m/(w[\s\S]+branches:)(\s+)(\S+)(\s)(\S+)(\s)(\S+)/)

		{
			$omega_3rate_0 = $3;
			$omega_3rate_1= $5;
			$omega_3rate_2= $7;
		}
		elsif ($line_m3 =~ m/dS\stree:/)
		
		{
		$ds_tree = <IN3>;
			if ($ds_tree =~ m/(Ceratodon_GG1:\s)(\d.\d+)/) 
			{$gg1_ds = $2;}
			if ($ds_tree =~ m/(Ceratodon_R40:\s)(\d.\d+)/) 
			{$r40_ds = $2;}
		}
	 
	 	elsif ($line_m3 =~ m/dN\stree:/)
		
		{
		$dn_tree = <IN3>;
			if ($dn_tree =~ m/(Ceratodon_GG1:\s)(\d.\d+)/) 
			{$gg1_dn = $2;}
			if ($dn_tree =~ m/(Ceratodon_R40:\s)(\d.\d+)/) 
			{$r40_dn = $2;}
		}
	}
}

my %AIC_hash=("1rate" => $AIC_1rate, "2rate" => $AIC_2rate, "3rate" => $AIC_3rate);
my @values = sort { $AIC_hash{$a} <=> $AIC_hash{$b} } keys %AIC_hash;
my $min = $values[0];
     
$min_value = "$min";
$best_model = "$AIC_hash{$min}";

print "$file_name\t$omega_1rate\t$AIC_1rate\t$omega_2rate_0\t$omega_2rate_1\t$AIC_2rate\t$omega_3rate_0\t$omega_3rate_1\t$omega_3rate_2\t$AIC_3rate\t$min_value\t$best_model\t\t$r40_ds\t$gg1_ds\t$r40_dn\t$gg1_dn\n";


close IN;
close IN2;
close IN3;


