##Script to translate a table in a .txt file (example of the format of .txt file is provided in ExampleTaxonomy.txt) to a tree structure in ape ##read in the taxonomy file to a matrix using read.table, this matrix is the only input needed ##output is an ape format tree, unrooted with polytomies tax2tree<- function(table){ require(ape) for (i in 1:ncol(table)){ table[,i]<-as.character(table[,i]) } #If a tree including families as well as genera as tips is required, fills in the table with the #appropriate information for (i in 1:nrow(table)){ if (is.na(table[i,ncol(table)])==TRUE){ table[i,ncol(table)]<-table[i,ncol(table)-1] } } #Sets up starting sring of tip names that will be changed in to a Newick format tree string<-table[,ncol(table)] comm<-0 orig.length<-length(string) for (i in 1:(orig.length-1)){ string<-append(string, ",",after=i+comm) comm<-comm+1 } for (k in 1:(ncol(table)-1)) { #runs through the columns of the table for (i in 2:nrow(table)) { #runs through the rows of the table if (i==nrow(table)) { #Different treatment required for the last row in the table if (is.na(table[i,k])||is.na(table[i-1,k])||table[i,k]!=table[i-1,k]) { #finding the position of the change from one taxon to the next next } else { end<-length(string) #calculating where extra parentheses need to be inserted start<-which(string==as.character(table[match(table[i,k],table[,k]),ncol(table)])) m<-1 if (k>1) { while (is.na(table[i,k-m])) { m<-m+1 } prevstart<-which(string==as.character(table[match(table[i,k-m],table[,k-m]),ncol(table)])) } else { prevstart<--999 } if (start!=prevstart ) { string<-append(string,")",after=end) #inserting parentheses to define set of branches coming from a node string<-append(string,"(", after=start-1) } } } else { #same procedure as above for all the other rows in the table if (is.na(table[i,k])||is.na(table[i+1,k])||is.na(table[i-1,k])) { #skipping over NAs to find the next cell with content next } else { if (table[i,k]!=table[i-1,k] && table[i,k]!= table[i+1,k]) { #skipping over taxon names that are the same as the ones before and after next } else { if (table[i,k]!=table[i+1,k]) { #finding the position of the change from one taxon to the next end<-which(string==as.character(table[i,ncol(table)])) #calculating where extra parentheses need to be inserted start<-which(string==as.character(table[match(table[i,k],table[,k]),ncol(table)])) m<-1 if (k>1) { while (is.na(table[i,k-m])) { m<-m+1 } prevstart<-which(string==as.character(table[match(table[i,k-m],table[,k-m]),ncol(table)])) } else { prevstart<--999 } if ((is.na(table[i+1,k-m])||table[i,k-m]!=table[i+1,k-m]) && start==prevstart) { next } else { string<-append(string,")",after=end) #inserting parentheses to define set of branches coming from a node string<-append(string,"(", after=start-1) } } } } } } } newick<-paste(c(string,";"),collapse="") #collapsing string to remove quotation marks tree<-read.tree(text=newick) #reading Newick string in to ape tree format return(tree) }