#!/usr/bin/perl

use strict;
use warnings;
use Cwd;

use POSIX;
use Math::BigFloat ':constant';
use Math::Round qw(:all);


my $dir = getcwd;


die "\nPlease provide \$Newick!\n\n" unless defined $ARGV[0];
my $Newick = $ARGV[0];

die "\nCannot find <$Newick> in <$dir>\n\n" unless -e $Newick;


####################################################################################################
####################################################################################################


my $C1 = 30;
my $C2 = 30;
my $C3 = 30;


my $N = 3000;



####################################################################################################


$Newick =~ /^(\S+?)\.m\.best\.tre/;
my $basename = $1;


my $tree = &Read_File($Newick);
$tree = &Drop_Tips($tree, "Nastanthus_spathulatus");


my ($NODE, $root) = &Get_Nodes($tree);
# print &Label_Tree($tree), "\n";


my $DIST = &Cophenetic($tree);
my $TIP = &Get_Tips($tree);


my $MPD = &MPD1($TIP, $DIST);


####################################################################################################


my @NODE = sort {$a <=> $b} ($C1, $C2, $C3);
my %TEMP;


foreach my $node (keys %{$$NODE{$root}}) {
	
	next if $node eq "NUM";
	next if $node eq "CLADE";
	
	next if $$NODE{$node}{"NUM"} < ($NODE[0] / 2);
	
	
#	print $$NODE{$node}{"NUM"}, "\n";
	$TEMP{$node} = $$NODE{$node}{"NUM"};
}


@NODE = sort { $TEMP{$b} <=> $TEMP{$a} } keys %TEMP;
unshift @NODE, $root;

my $NNODE = scalar @NODE;


print "\n>$basename\nMPD: $MPD\nNNODE: $NNODE\n";
die "\n\$NNODE <= 20!\n\n" if $NNODE <= 20;


####################################################################################################


my %ALL;


if (-e "$basename-${C1}_${C2}_$C3.txt") {
	
	open FH1, "<", "$basename-${C1}_${C2}_$C3.txt";
	while (<FH1>) {
		if (/^\S+?\s+?(\S+)/) {
			$ALL{$1} = 1;
		}
	}
	close FH1;
}


####################################################################################################


my (%COMM, $NR);
%TEMP = ();


while (1) {
	
	my $node1 = $NODE[rand $NNODE];
	my $node2 = $NODE[rand $NNODE];
	
	next if $node2 eq $node1;
	
	my $node3 = $NODE[rand $NNODE];	
	
	next if $node3 eq $node1;
	next if $node3 eq $node2;
	
	
	##################################
	
	
	my (@TAXA, $NTAX, $comm);
	
	
	if ( (exists $$NODE{$node1}{$node2}) && (exists $$NODE{$node1}{$node3}) ) {
		
		$comm = $node1;
		$NTAX = $$NODE{$node1}{"NUM"};
		
		@TAXA = split(/,/, $$NODE{$node1}{"CLADE"});
	}
	elsif ( (exists $$NODE{$node2}{$node1}) && (exists $$NODE{$node2}{$node3}) ) {
		
		$comm = $node2;
		$NTAX = $$NODE{$node2}{"NUM"};
		
		@TAXA = split(/,/, $$NODE{$node2}{"CLADE"});
	}
	elsif ( (exists $$NODE{$node3}{$node1}) && (exists $$NODE{$node3}{$node2}) ) {
		
		$comm = $node3;
		$NTAX = $$NODE{$node3}{"NUM"};
		
		@TAXA = split(/,/, $$NODE{$node3}{"CLADE"});
	}
	
	##################################
	
	elsif ( (exists $$NODE{$node1}{$node3}) || (exists $$NODE{$node2}{$node3}) ) {
		
		$comm = join(",", sort($node1, $node2));
		$NTAX = $$NODE{$node1}{"NUM"} + $$NODE{$node2}{"NUM"};
		
		my $clade = join(",", $$NODE{$node1}{"CLADE"},$$NODE{$node2}{"CLADE"});
		
		@TAXA = split(/,/, $clade);
		@TAXA = sort @TAXA;
	}
	elsif ( (exists $$NODE{$node1}{$node2}) || (exists $$NODE{$node3}{$node2}) ) {
		
		$comm = join(",", sort($node1, $node3));
		$NTAX = $$NODE{$node1}{"NUM"} + $$NODE{$node3}{"NUM"};
		
		my $clade = join(",", $$NODE{$node1}{"CLADE"},$$NODE{$node3}{"CLADE"});
		
		@TAXA = split(/,/, $clade);
		@TAXA = sort @TAXA;
	}
	elsif ( (exists $$NODE{$node2}{$node1}) || (exists $$NODE{$node3}{$node1}) ) {
		
		$comm = join(",", sort($node2, $node3));
		$NTAX = $$NODE{$node2}{"NUM"} + $$NODE{$node3}{"NUM"};
		
		my $clade = join(",", $$NODE{$node2}{"CLADE"},$$NODE{$node3}{"CLADE"});
		
		@TAXA = split(/,/, $clade);
		@TAXA = sort @TAXA;
	}
	
	##################################
		
	else {
		
		$comm = join(",", sort($node1, $node2, $node3));
		$NTAX = $$NODE{$node1}{"NUM"} + $$NODE{$node2}{"NUM"} + $$NODE{$node3}{"NUM"};
		
		my $clade = join(",", $$NODE{$node1}{"CLADE"},$$NODE{$node2}{"CLADE"},$$NODE{$node3}{"CLADE"});
		
		@TAXA = split(/,/, $clade);
		@TAXA = sort @TAXA;
	}
	
		
	next if $NTAX < ($C1 + $C2 + $C3);
	next if exists $TEMP{$comm};
	
	$TEMP{$comm} = 1;
	
	
	##################################
	
	
	if ( $NTAX <= ($C1 + $C2 + $C3 + 4) ) {
		$NR = 1;
	}
	elsif ( $NTAX <= ($C1 + $C2 + $C3 + 8) ) {
		$NR = 1;
	}
	elsif ( $NTAX <= ($C1 + $C2 + $C3 + 16) ) {
		$NR = 1;
	}
	elsif ( $NTAX <= ($C1 + $C2 + $C3 + 32) ) {
		$NR = 2;
	}
	else {
		$NR = 4;
	}
	
	
	##################################
	
	
	my $nr = 0;
	my $start = time();
	
	
	while (1) {
		
		my $current = time();
		last if ($current - $start) > 10;
		
		
		my (%C1, %C2, %C3);
		
		while (1) {
			
			my $temp = $TAXA[rand @TAXA];
			next if exists $C1{$temp};
			
			$C1{$temp} = 1;
			last if scalar(keys %C1) == $C1;
		}
		while (1) {
			
			my $temp = $TAXA[rand @TAXA];
			
			next if exists $C1{$temp};
			next if exists $C2{$temp};
			
			$C2{$temp} = 1;
			last if scalar(keys %C2) == $C2;
		}
		while (1) {
			
			my $temp = $TAXA[rand @TAXA];
			
			next if exists $C1{$temp};
			next if exists $C2{$temp};
			next if exists $C3{$temp};
			
			$C3{$temp} = 1;
			last if scalar(keys %C3) == $C3;
		}
		
		
		##################################
		
		
		my $n = 0;
		
		foreach my $node (@NODE) {
			
			next if ($$NODE{$node}{"NUM"} < $C1) && ($$NODE{$node}{"NUM"} < $C2) && ($$NODE{$node}{"NUM"} < $C3);
			my @TEMP = split(/,/, $$NODE{$node}{"CLADE"});
			
			my %C;
			foreach (@TEMP) { $C{$_} = 1; }
			
			
			my ($k, $l, $m) = (0, 0, 0);
			
			foreach (keys %C1) { $k++ if exists $C{$_}; }
			foreach (keys %C2) { $l++ if exists $C{$_}; }
			foreach (keys %C3) { $m++ if exists $C{$_}; }
			
			
			if ( ($k == $C1) && ($l == 0) && ($m == 0) ) {
				$n = 1;
				last;
			}
			elsif ( ($k == 0) && ($l == $C2) && ($m == 0) ) {
				$n = 1;
				last;
			}
			elsif ( ($k == 0) && ($l == 0) && ($m == $C3) ) {
				$n = 1;
				last;
			}
		}
		
		next if $n == 1;
		
		
		##################################
		
		
		my @C1 = sort keys %C1;
		my @C2 = sort keys %C2;
		my @C3 = sort keys %C3;
		
		$comm = join(",", sort(@C1, @C2, @C3));
		
		
		next if exists $ALL{$comm};
		next if exists $COMM{$comm};
		
		
		##################################
		
		
		$nr++;
		
		$COMM{$comm}{"C1"} = join(",", @C1);
		$COMM{$comm}{"C2"} = join(",", @C2);
		$COMM{$comm}{"C3"} = join(",", @C3);
		
		
		$COMM{$comm}{"c1"} = &MPD1($COMM{$comm}{"C1"}, $DIST) / $MPD;
		$COMM{$comm}{"c2"} = &MPD1($COMM{$comm}{"C2"}, $DIST) / $MPD;
		$COMM{$comm}{"c3"} = &MPD1($COMM{$comm}{"C3"}, $DIST) / $MPD;
		
		$COMM{$comm}{"c1"} = nearest(1e-6, $COMM{$comm}{"c1"});
		$COMM{$comm}{"c2"} = nearest(1e-6, $COMM{$comm}{"c2"});
		$COMM{$comm}{"c3"} = nearest(1e-6, $COMM{$comm}{"c3"});
		
		$COMM{$comm}{"c1"} = sprintf("%.6f", $COMM{$comm}{"c1"});
		$COMM{$comm}{"c2"} = sprintf("%.6f", $COMM{$comm}{"c2"});
		$COMM{$comm}{"c3"} = sprintf("%.6f", $COMM{$comm}{"c3"});
		
		
		$COMM{$comm}{"c1c2"} = &MPD2($DIST, \@C1, \@C2) / $MPD;
		$COMM{$comm}{"c2c3"} = &MPD2($DIST, \@C2, \@C3) / $MPD;
		$COMM{$comm}{"c3c1"} = &MPD2($DIST, \@C3, \@C1) / $MPD;
		
		$COMM{$comm}{"c1c2"} = nearest(1e-6, $COMM{$comm}{"c1c2"});
		$COMM{$comm}{"c2c3"} = nearest(1e-6, $COMM{$comm}{"c2c3"});
		$COMM{$comm}{"c3c1"} = nearest(1e-6, $COMM{$comm}{"c3c1"});
		
		$COMM{$comm}{"c1c2"} = sprintf("%.6f", $COMM{$comm}{"c1c2"});
		$COMM{$comm}{"c2c3"} = sprintf("%.6f", $COMM{$comm}{"c2c3"});
		$COMM{$comm}{"c3c1"} = sprintf("%.6f", $COMM{$comm}{"c3c1"});
		
		
		$COMM{$comm}{"MPD"} = ($COMM{$comm}{"c1"} + $COMM{$comm}{"c2"} + $COMM{$comm}{"c3"}) * ($COMM{$comm}{"c1c2"} + $COMM{$comm}{"c2c3"} + $COMM{$comm}{"c3c1"}) / 9;
		
		$COMM{$comm}{"MPD"} = nearest(1e-6, $COMM{$comm}{"MPD"});
		$COMM{$comm}{"MPD"} = sprintf("%.6f", $COMM{$comm}{"MPD"});
		
		last if $nr == $NR;
	}
	
	print scalar(keys %TEMP), "\n";
	last if scalar(keys %TEMP) == $N;
}


####################################################################################################


my @COMM = sort { $COMM{$a}{"MPD"} <=> $COMM{$b}{"MPD"} } keys %COMM;

print "NCOMM: ", scalar(@COMM), "\n";
print "DIST min: ", $COMM{$COMM[0]}{"MPD"}, "\nDIST max: ", $COMM{$COMM[-1]}{"MPD"}, "\n\n";


$N = int(scalar(@COMM) / 10);
my %OUT;


for (my $i = 0; $i < 10; $i++) {
	
	my $n = 0;
	
	while (1) {
		
		my $m = int(rand $N) + $N * $i;
		next if exists $OUT{$m};
		
		$n++;
		$OUT{$m} = 1;
		
		last if $n == 60;
	}
}


####################################################################################################


my $i = 0;

open FH1, ">>", "$basename-${C1}_${C2}_$C3.txt";
open FH2, ">", "$basename.community.txt";


$tree =~ s/ : [Ee.\d\-]+ //gx;	# branch_length
open FH3, ">", "$basename.community.tre";


foreach my $m (sort { $a <=> $b } keys %OUT) {
	
	$i++;
	
	my $comm = $COMM[$m];
	print FH1 "D\t$comm\n";
	
	
	print FH2 ">$basename-D${C1}_${C2}_$C3-$i ", $COMM{$comm}{"MPD"}, " c1=",$COMM{$comm}{"c1"}, ",c2=",$COMM{$comm}{"c2"}, ",c3=",$COMM{$comm}{"c3"}, ",c1c2=",$COMM{$comm}{"c1c2"}, ",c2c3=",$COMM{$comm}{"c2c3"}, ",c3c1=",$COMM{$comm}{"c3c1"}, "\n";
	print FH2 "c1=", $COMM{$comm}{"C1"}, ", c2=", $COMM{$comm}{"C2"}, ", c3=", $COMM{$comm}{"C3"}, "\n";
	
	
	##################################
	
	
	my $temp = $tree;
	
	my @TEMP = split(/,/, $COMM{$comm}{"C1"});
	foreach my $taxon (@TEMP) { $temp =~ s/\b$taxon\b/\[$taxon-c1]/; }
	
	
	@TEMP = split(/,/, $COMM{$comm}{"C2"});
	foreach my $taxon (@TEMP) { $temp =~ s/\b$taxon\b/\[$taxon-c2]/; }
	
	
	@TEMP = split(/,/, $COMM{$comm}{"C3"});
	foreach my $taxon (@TEMP) { $temp =~ s/\b$taxon\b/\[$taxon-c3]/; }
	
	
	print FH3 "$temp\n";
}

close FH1;
close FH2;

close FH3;



####################################################################################################
####################################################################################################
# Cophenetic (ape: Analyses of Phylogenetics and Evolution)
#
# Require: single Newick tree
#
# Output: hash reference DIST
#


sub Cophenetic {
	
	
	my $tree = shift;	
	die "\nIncorrect <\$tree> value for <sub Cophenetic>!\n\n" unless (defined $tree) && (length($tree) > 0);
	
	my ($TREE, $root) = &Read_Newick($tree);
	
	
	##################################
	
	
	my (%DIST, %BRANCH);
	my $branch;
	
	
	foreach my $node (sort { $$TREE{$a}[1] <=> $$TREE{$b}[1] } keys %$TREE) {
		
		next if $$TREE{$node}[1] == 1;
		
#	%TREE: {ancestor, #descendants, clade, branch_length, node_label, descendant1, descendant2, ....}
#			0			1			2			3			4			5			6
#		   {ancestor, #descendants, clade, branch_length}
		
		
		for (my $i = 5; $i < scalar(@{$$TREE{$node}}); $i++) {
			
			my $descendant = $$TREE{$node}[$i];
			
			
			if ($$TREE{$descendant}[3] == -1) {
				$branch = 0;
			}
			else {
				$branch = $$TREE{$descendant}[3];
			}
			
			
			if ($descendant =~ /^#/) {
				
				foreach my $taxon (keys %{$BRANCH{$descendant}}) {
					$BRANCH{$node}{$taxon} = $BRANCH{$descendant}{$taxon} + $branch;
				}
			}
			else {
				$BRANCH{$node}{$descendant} = $branch;
			}
		}
		
		
		##################################
		
		
		my @CLADE = split(/,/, $$TREE{$node}[2]);
		
		
		foreach my $taxon1 (@CLADE) {
			foreach my $taxon2 (@CLADE) {
				
				next if $taxon1 eq $taxon2;
				next if exists $DIST{$taxon1}{$taxon2};
				
				$DIST{$taxon1}{$taxon2} = $BRANCH{$node}{$taxon1} + $BRANCH{$node}{$taxon2};
			}
		}
	}
	
	
	##################################
	
	
	return \%DIST;
}




####################################################################################################
####################################################################################################
# Drop_Tips
#
# Require: single Newick tree && hash reference TIP
#
# Output: single Newick tree
#


sub Drop_Tips {
	
	
	my ($tree, $TIP) = @_;
	
	die "\nIncorrect <\$tree> value for <sub Drop_Tips>!\n\n" unless (defined $tree) && (length($tree) > 0);
	die "\nIncorrect <\$TIP> value for <sub Drop_Tips>!\n\n" unless (defined $TIP) && (length($TIP) > 0);
		
	
	my ($TREE, $root) = &Read_Newick($tree);
	my (%TIP, @DROP, @TEMP, @TEMP1, $ancestor, $descendant, $clade, $taxa, $i);
	
	
	@TEMP = split(/,/, $TIP);
	foreach (@TEMP) { $TIP{$_} = 1; }
	
	
	##################################
	
	
#	%TREE: {ancestor, #descendants, clade, branch_length, node_label, descendant1, descendant2, ....}
#			0			1			2			3			4			5			6
#		   {ancestor, #descendants, clade, branch_length}
	
	
	foreach my $node (sort { $$TREE{$a}[1] <=> $$TREE{$b}[1] } keys %$TREE) {
		
		next if $node eq $root;
		next if $$TREE{$node}[1] == 1;
		
		##################################
		
		if (scalar(@{$$TREE{$node}}) == 5) {
			$ancestor = $$TREE{$node}[0];
			
			for ($i = 5; $i < scalar(@{$$TREE{$ancestor}}); $i++) {
				
				$descendant = $$TREE{$ancestor}[$i];
				next unless $descendant eq $node;
				
				
				splice(@{$$TREE{$ancestor}}, $i, 1);
				last;
			}
			
			delete $$TREE{$node};
		}
		
		##################################
		
		else {
			@TEMP = ();
			
			for ($i = 5; $i < scalar(@{$$TREE{$node}}); $i++) {
				
				$descendant = $$TREE{$node}[$i];
				
				if ($descendant =~ /^#/) {
					push @TEMP, $descendant;
					next;
				}
				
				##################################
				
				if (exists $TIP{$descendant}) {
					
					push @DROP, $descendant;
					delete $$TREE{$descendant};
				}
				else {
					push @TEMP, $descendant;
				}
			}
			
			splice(@{$$TREE{$node}}, 5);
			push @{$$TREE{$node}}, @TEMP if @TEMP;
			
			##################################
			
			if (scalar(@{$$TREE{$node}}) == 5) {
				$ancestor = $$TREE{$node}[0];
				
				for ($i = 5; $i < scalar(@{$$TREE{$ancestor}}); $i++) {
					
					$descendant = $$TREE{$ancestor}[$i];
					next unless $descendant eq $node;
					
					splice(@{$$TREE{$ancestor}}, $i, 1);
					last;
				}
				
				delete $$TREE{$node};
			}
			
			##################################
			
			elsif (scalar(@{$$TREE{$node}}) == 6) {
				$ancestor = $$TREE{$node}[0];
				
				for ($i = 5; $i < scalar(@{$$TREE{$ancestor}}); $i++) {
					
					$descendant = $$TREE{$ancestor}[$i];
					next unless $descendant eq $node;
					
					last;
				}
				
				##################################
				
				$descendant = $$TREE{$node}[5];
				
				if ($$TREE{$node}[3] != -1) {
					if ($$TREE{$descendant}[3] != -1) {
						
						$$TREE{$descendant}[3] += $$TREE{$node}[3];
					}
					else {
						$$TREE{$descendant}[3] = $$TREE{$node}[3];
					}
				}
				
				##################################
				
				splice(@{$$TREE{$ancestor}}, $i, 1, $descendant);
				
				delete $$TREE{$node};
			}
			
#	%TREE: {ancestor, #descendants, clade, branch_length, node_label, descendant1, descendant2, ....}
#			0			1			2			3			4			5			6
#		   {ancestor, #descendants, clade, branch_length}
			
			else {
				@TEMP1 = ();
				
				for ($i = 5; $i < scalar(@{$$TREE{$node}}); $i++) {
					
					@TEMP = split(/,/, $$TREE{$node}[2]);
					push @TEMP1, @TEMP;
				}
				
			 	@TEMP1 = sort @TEMP1;
				$clade = join(",", @TEMP1);
				
				##################################
				
				($$TREE{$node}[1], $$TREE{$node}[2]) = (scalar(@TEMP1), $clade);
			}
		}
	}
	
	##################################
	##################################
	
	@TEMP = ();
	
	for ($i = 5; $i < scalar(@{$$TREE{$root}}); $i++) {
		$descendant = $$TREE{$root}[$i];
		
		if ($descendant =~ /^#/) {
			push @TEMP, $descendant;
			next;
		}
		
		##################################
		
		if (exists $TIP{$descendant}) {
			
			push @DROP, $descendant;
			delete $$TREE{$descendant};
		}
		else {
			push @TEMP, $descendant;
		}
	}
	
	splice(@{$$TREE{$root}}, 5);
	push @{$$TREE{$root}}, @TEMP if @TEMP;
	
	##################################
	
	if (scalar(@{$$TREE{$root}}) == 5) {
		$tree = "";
	}
	elsif (scalar(@{$$TREE{$root}}) == 6) {
		
		while (1) {
			$descendant = $$TREE{$root}[5];
			
			delete $$TREE{$root};
			$root = $descendant;
			
			##################################
			
			if (scalar(@{$$TREE{$root}}) == 4) {
				
				$tree = "";
				last;
			}
			elsif (scalar(@{$$TREE{$root}}) > 6) {
				
				($$TREE{$root}[0], $$TREE{$root}[3]) = ("#root", -1);
				$tree = &Write_Newick($TREE, $root);
				
				last;
			}
		}
	}
	
#	%TREE: {ancestor, #descendants, clade, branch_length, node_label, descendant1, descendant2, ....}
#			0			1			2			3			4			5			6
#		   {ancestor, #descendants, clade, branch_length}
	
	else {
		@TEMP1 = ();
		
		for ($i = 5; $i < scalar(@{$$TREE{$root}}); $i++) {
			
			@TEMP = split(/,/, $$TREE{$root}[2]);
			push @TEMP1, @TEMP;
		}
		
		@TEMP1 = sort @TEMP1;
		$clade = join(",", @TEMP1);
		
		##################################
		
		($$TREE{$root}[1], $$TREE{$root}[2]) = (scalar(@TEMP1), $clade);
		$tree = &Write_Newick($TREE, $root);
	}
	
	##################################
	##################################
	
#	if (@DROP) {
#		print "\n";
#		
#		foreach (sort @DROP) {
#			print "$_\n";
#		}
#		print "\n";
#	}
	
	return $tree;
}




####################################################################################################
####################################################################################################
# Get_Nodes
#
# Require: single Newick tree
#
# Output: hash reference NODE && ROOT
#


sub Get_Nodes {
	
	
	my $tree = shift;
	die "\nIncorrect <\$tree> value for <sub Get_Nodes>!\n\n" unless (defined $tree) && (length($tree) > 0);
	
	my ($TREE, $root) = &Read_Newick($tree);
	
	
	##################################
	
	
	my %NODE;
	
	
	foreach my $node (sort { $$TREE{$a}[1] <=> $$TREE{$b}[1] } keys %$TREE) {
		
		next if $$TREE{$node}[1] == 1;
		
		$NODE{$node}{"NUM"} = $$TREE{$node}[1];
		$NODE{$node}{"CLADE"} = $$TREE{$node}[2];
		
		
#	%TREE: {ancestor, #descendants, clade, branch_length, node_label, descendant1, descendant2, ....}
#			0			1			2			3			4			5			6
#		   {ancestor, #descendants, clade, branch_length}
		
		
		for (my $i = 5; $i < scalar(@{$$TREE{$node}}); $i++) {
			
			my $descendant = $$TREE{$node}[$i];
			next unless $descendant =~ /^#/;
			
			$NODE{$node}{$descendant} = -1;
			
			
			foreach my $node1 (keys %{$NODE{$descendant}}) {
				
				next if $node1 eq "NUM";
				next if $node1 eq "CLADE";
				
				$NODE{$node}{$node1} = -1;
			}
		}
	}
	
	
	##################################
	
	
	return (\%NODE, $root);
}




####################################################################################################
####################################################################################################
# Get_Tips
#
# Require: single Newick tree
#
# Output: all tips
#


sub Get_Tips {
	
	
	my $tree = shift;
	die "\nIncorrect <\$tree> value for <sub Get_Tips>!\n\n" unless (defined $tree) && (length($tree) > 0);
	
	
	$tree =~ s/;$//;
	$tree =~ s/ : [Ee.\d\-]+ //gx;	# branch_length
	$tree =~ s/ \) [^;,)\s]+ /)/gx;	# node_label
	$tree =~ s/[()]//g;
	
	
	my @TIPS = split(/,/, $tree);
	@TIPS = sort @TIPS;
	
	
	my $tips = join(",", @TIPS);
	return $tips;
}




####################################################################################################
####################################################################################################
# Label_Tree
#
# Require: single Newick tree
#
# Output: single Newick tree
#


sub Label_Tree {
	
	
	my $tree = shift;
	die "\nIncorrect <\$tree> value for <sub Keep_Tips>!\n\n" unless (defined $tree) && (length($tree) > 0);	
	
	my ($TREE, $root) = &Read_Newick($tree);
	
	
	##################################
	
	
#	%TREE: {ancestor, #descendants, clade, branch_length, node_label, descendant1, descendant2, ....}
#			0			1			2			3			4			5			6
#		   {ancestor, #descendants, clade, branch_length}
	
	
	foreach my $node (keys %$TREE) {
		
		next if $$TREE{$node}[1] == 1;
		
		$node =~ /(\d+)/;
		$$TREE{$node}[4] = $1;
	}
	
	
	$tree = &Write_Newick($TREE, $root);
	return $tree;
}




####################################################################################################
####################################################################################################
# MPD: mean phylogenetic distance
#
# Require: array reference LIST && hash reference DIST
#
# Output: MPD
#


sub MPD1 {
	
	
	my ($LIST, $DIST) = @_;	
	
	
	die "\nIncorrect <\$LIST> value for <sub MPD>!\n\n" unless (defined $LIST) && (length($LIST) > 0);
	die "\nIncorrect <\$DIST> value for <sub MPD>!\n\n" unless (defined $DIST) && (ref($DIST) eq "HASH") && (scalar(keys %$DIST) > 0);
	
	
	my @TAXA = sort split(/,/, $LIST);
	
	
	##################################
	
	
	my ($MPD, $n) = (0, 0);
	
	
	foreach my $taxon1 (@TAXA) {
		foreach my $taxon2 (@TAXA) {
			
			last if $taxon1 eq $taxon2;
			
			$n++;
			$MPD += $$DIST{$taxon1}{$taxon2};
		}
	}
	
	
	##################################
	
	
	$MPD /= $n;
	
	$MPD = nearest(1e-8, $MPD);
	$MPD = sprintf("%.8f", $MPD);
	
	
	return $MPD;
}




sub MPD2 {
	
	my ($DIST, $C1, $C2) = @_;
	my $C1_C2 = 0;
	
	
	foreach my $taxon1 (@$C1) {
		foreach my $taxon2 (@$C2) {
			
			$C1_C2 += $$DIST{$taxon1}{$taxon2};
		}
	}
	
	$C1_C2 /= ( scalar(@$C1) * scalar(@$C2) );
	
	
	##################################
	
	
	$C1_C2 = nearest(1e-8, $C1_C2);
	$C1_C2 = sprintf("%.8f", $C1_C2);
	
	
	return $C1_C2;
}




####################################################################################################
####################################################################################################
# Read_File
#
# Require: Newick tree file
#
# Output: array reference TREES
#


sub Read_File {
	
	
	my ($INFILE, $ntree, $outgroup);
	
	
	if (scalar(@_) == 1) {
		
		$INFILE = shift;
		($ntree, $outgroup) = (0, "");
	}
	elsif (scalar(@_) == 2) {
		($INFILE, $ntree) = @_;
		
		if ($ntree =~ /^\d+$/) {
			$outgroup = "";
		}
		else {
			($ntree, $outgroup) = (0, $ntree);
		}
	}
	else {
		($INFILE, $ntree, $outgroup) = @_;
	}
	
	
	die "\nIncorrect <\$INFILE> value for <sub Read_File>!\n\n" unless (defined $INFILE) && (length($INFILE) > 0);
	die "\nCannot find input Newick tree file <$INFILE>!\n\n" unless -e $INFILE;
	
	die "\nIncorrect <\$ntree> value for <sub Read_File>!\n\n" unless (defined $ntree) && (length($ntree) > 0) && ($ntree =~ /^\d+$/);
	die "\nIncorrect <\$outgroup> value for <sub Read_File>!\n\n" unless defined $outgroup;
	
	##################################
	
	my @TREE;
	
	
	open FH, "<", $INFILE;
	while (<FH>) {
		next if /^\s*$/;
		
		chomp;
		push @TREE, $_;

	}
	close FH;
	die "\nIncorrect Newick tree file <$INFILE>!\n\n" unless @TREE;
	
	##################################
	
	splice(@TREE, 0, (scalar(@TREE) - $ntree)) if $ntree != 0;
	
	
	if ($outgroup ne "") {
		foreach my $tree (@TREE) {
			$tree = &Root($tree, $outgroup);
		}
	}
	
	##################################
	
	if (scalar(@TREE) == 1) {
		return $TREE[0];
	}
	else {
		return \@TREE;
	}
}




####################################################################################################
####################################################################################################
# Read_Newick
#
# Require: single Newick tree
#
# Output: hash reference TREE && ROOT
#


sub Read_Newick {
	
	
	my $tree = shift;
	die "\nIncorrect <\$tree> value for <sub Read_Newick>!\n\n" unless (defined $tree) && (length($tree) > 0);
	
	
	$tree =~ s/;$//;
	
	
	my (%TREE, @CLADE, $clade, $node, $taxa, $branch_length);
	
	
#	%TREE: {ancestor, #descendants, clade, branch_length, node_label, descendant1, descendant2, ....}
#			0			1			2			3			4			5			6
#		   {ancestor, #descendants, clade, branch_length}
	
	
	my $nnode = 0;
	
	while (1) {
		# standard Newick tree
		@CLADE = $tree =~ / ( \( [^()\s]+? \) [^,:;\s)]* :? [Ee.\d\-]* ) /gx;
		
		############################################################################################
		
		last if scalar(@CLADE) == 0;
		
		
		foreach $clade (@CLADE) {
			$nnode++;
			
			##################################
#			#descendants, clade, branch_length, node_label
#			1				2		3				4
			
			if ($clade =~ /\) ([^:\s]+?) : ([Ee.\d\-]+) /x) {
				
				$TREE{"#$nnode"} = [ (0, "", $2, $1) ];
			}
			elsif ($clade =~ /\) ([^:\s]+) /x) {
				
				$TREE{"#$nnode"} = [ (0, "", -1, $1) ];
			}
			elsif ($clade =~ /\) : ([Ee.\d\-]+) /x) {
				
				$TREE{"#$nnode"} = [ (0, "", $1, -1) ];
			}
			else {
				$TREE{"#$nnode"} = [ (0, "", -1, -1) ];
			}
			
			##################################
			
			$tree =~ s/\Q$clade\E/#$nnode/;
			
			
			$clade =~ s/ \( //x;
			$clade =~ s/  \) [^)\s]* //x;
			
			##################################
			
			my @TEMP = split(/,/, $clade);
			
			foreach $node (@TEMP) {
				
				($taxa, $branch_length) = ($node =~ /^ (\S+?) : (\S+) /x) ? ($1, $2) : ($node, -1);
				
				##################################
				
				if ((exists $TREE{$taxa}) && (scalar(@{$TREE{$taxa}}) == 4)) {
					die "\nDuplicate taxon <$taxa>!\n\n";
				}
				elsif (exists $TREE{$taxa}) {
					unshift @{$TREE{$taxa}}, "#$nnode";
				}
				else {
					$TREE{$taxa} = [ ("#$nnode", 1, $taxa, $branch_length) ];
				}
				
				##################################
	
#	%TREE: {ancestor, #descendants, clade, branch_length, node_label, descendant1, descendant2, ....}
#			0			1			2			3			4			5			6
#		   {ancestor, #descendants, clade, branch_length}

				
				# clade
				if ($TREE{"#$nnode"}[1] eq "") {
					$TREE{"#$nnode"}[1] = $TREE{$taxa}[2];
				}
				else {
					my @TEMP1 = split(/,/, $TREE{"#$nnode"}[1]);
					my @TEMP2 = split(/,/, $TREE{$taxa}[2]);
					
					@TEMP1 = sort (@TEMP1, @TEMP2);
					
					my $temp1 = join(",", @TEMP1);
					$TREE{"#$nnode"}[1] = $temp1;
				}
				
				
				# descendant
				push @{$TREE{"#$nnode"}}, $taxa;
				
				
				# #descendants
				$TREE{"#$nnode"}[0] += $TREE{$taxa}[1];
			}
		}
	}
	
	die "\nIncorrect <\$tree> value for <sub Read_Newick>!\n\n" unless %TREE;
	
	unshift @{$TREE{"#$nnode"}}, "#root";
	
	return (\%TREE, "#$nnode");
}




####################################################################################################
####################################################################################################
# Write_Newick
#
# Require: hash reference TREE && ROOT
#
# Output: single Newick tree
#


sub Write_Newick {
	
	
	my ($TREE, $root) = @_;
	
	die "\nIncorrect <\$TREE> value for <sub Write_Newick>!\n\n" unless (defined $TREE) && (ref($TREE) eq "HASH") && (scalar(keys %$TREE) > 0);
	die "\nIncorrect <\$root> value for <sub Write_Newick>!\n\n" unless (defined $root) && ($root =~ /^#\d+$/);
	
	
	my %TREE = %$TREE;
	my ($node, $clade, $i);
	
	
	my $tree = "(";
	
	for ($i = 5; $i < scalar(@{$TREE{$root}}); $i++) {
		$tree .= "$TREE{$root}[$i],";
	}
	
	$tree =~ s/,$/)$TREE{$root}[4]:$TREE{$root}[3];/;
	delete $TREE{$root};
	
	
	##################################
#	%TREE: {ancestor, #descendants, clade, branch_length, node_label, descendant1, descendant2, ....}
#			0			1			2			3			4			5			6
#		   {ancestor, #descendants, clade, branch_length}
	
	
	while (1) {
		last if scalar(keys %TREE) == 0;
		
		foreach $node (keys %TREE) {
			next unless $tree =~ /[(,]\Q$node\E[,)]/;
			
			##################################
			
			
			if (scalar(@{$TREE{$node}}) > 4) {				
				
				$clade = "(";
				
				for ($i = 5; $i < scalar(@{$TREE{$node}}); $i++) {
					$clade .= "$TREE{$node}[$i],";
				}
				
				$clade =~ s/,$/)$TREE{$node}[4]:$TREE{$node}[3]/;
			}
			else {
				$clade = $node . ":" . $TREE{$node}[3];
			}
			
			##################################
			
			
			$tree =~ s/([(,])\Q$node\E([,)])/$1$clade$2/;
			delete $TREE{$node};
		}
	}
	
	##################################
	
	$tree =~ s/\)-1/)/g;
	$tree =~ s/:-1//g;
	
	return $tree;
}




