##Script to translate a table in a .txt file (example of the format of .txt file is provided in ExampleTaxonomy.txt) to a tree structure in ape

##read in the taxonomy file to a matrix using read.table, this matrix is the only input needed

##output is an ape format tree, unrooted with polytomies

tax2tree<- function(table){

    require(ape)

    
    for (i in 1:ncol(table)){
        table[,i]<-as.character(table[,i])
    }
    
    #If a tree including families as well as genera as tips is required, fills in the table with the
    #appropriate information
    for (i in 1:nrow(table)){
        if (is.na(table[i,ncol(table)])==TRUE){
            table[i,ncol(table)]<-table[i,ncol(table)-1]
        }
    }

    #Sets up starting sring of tip names that will be changed in to a Newick format tree
    string<-table[,ncol(table)]
    comm<-0
    orig.length<-length(string)
    for (i in 1:(orig.length-1)){
        string<-append(string, ",",after=i+comm)
        comm<-comm+1
    }

    for (k in 1:(ncol(table)-1)) {    #runs through the columns of the table
        for (i in 2:nrow(table)) {    #runs through the rows of the table
            if (i==nrow(table)) {    #Different treatment required for the last row in the table
                if (is.na(table[i,k])||is.na(table[i-1,k])||table[i,k]!=table[i-1,k]) {    #finding the position of the change from one taxon to the next
                    next
                } else {
                    end<-length(string)    #calculating where extra parentheses need to be inserted
                    start<-which(string==as.character(table[match(table[i,k],table[,k]),ncol(table)]))
                    m<-1
                    if (k>1) {
                        while (is.na(table[i,k-m])) {
                            m<-m+1
                        }
                        prevstart<-which(string==as.character(table[match(table[i,k-m],table[,k-m]),ncol(table)]))
                    } else {
                        prevstart<--999
                    }
                    if (start!=prevstart ) {
                        string<-append(string,")",after=end)    #inserting parentheses to define set of branches coming from a node
                        string<-append(string,"(", after=start-1)
                    }
		}

            } else {    #same procedure as above for all the other rows in the table
                if (is.na(table[i,k])||is.na(table[i+1,k])||is.na(table[i-1,k])) {    #skipping over NAs to find the next cell with content
                    next
                } else {
                    if (table[i,k]!=table[i-1,k] && table[i,k]!= table[i+1,k]) {   #skipping over taxon names that are the same as the ones before and after
                        next
                    } else {
                        if (table[i,k]!=table[i+1,k]) {    #finding the position of the change from one taxon to the next
                            end<-which(string==as.character(table[i,ncol(table)]))    #calculating where extra parentheses need to be inserted
                            start<-which(string==as.character(table[match(table[i,k],table[,k]),ncol(table)]))
                            m<-1		  
                            if (k>1) {
                                while (is.na(table[i,k-m])) {
                                    m<-m+1
                                }
                                prevstart<-which(string==as.character(table[match(table[i,k-m],table[,k-m]),ncol(table)]))
                            } else {
                                prevstart<--999
                            }
                            if ((is.na(table[i+1,k-m])||table[i,k-m]!=table[i+1,k-m]) && start==prevstart) {
                                next
                            } else {
                                string<-append(string,")",after=end)    #inserting parentheses to define set of branches coming from a node
                                string<-append(string,"(", after=start-1)
                            }
                        }
                    }
                }
            }
        }
    }
    newick<-paste(c(string,";"),collapse="")    #collapsing string to remove quotation marks
    tree<-read.tree(text=newick)    #reading Newick string in to ape tree format
    return(tree)
}