
####################################################################################################
# load and install binary packages

ipak <- function(pkg){
    new.pkg <- pkg[!(pkg %in% installed.packages()[, "Package"])]
    if(length(new.pkg)) install.packages(new.pkg, dependencies = TRUE)
    sapply(pkg, require, character.only = TRUE)
}

packages <- c("ape", "reshape", "adephylo", "Hmisc", "data.table")
ipak(packages)


Rn <- 10000  # number of replicates

####################################################################################################
# R CMD BATCH --no-save --no-restore '--args -phy=SP1.tre -ntax=30' 5.Community.R

args <- commandArgs(trailingOnly=TRUE)

tree <- sub('-phy=', '', args[grep('-phy=', args)])
n <- as.numeric(sub('-ntax=', '', args[grep('-ntax=', args)]))  # community size 


basename <- sub(".tre", "", tree)
phy <- read.tree(tree)


phy <- drop.tip(phy, "t1")  # drop outgroup
branch <- cophenetic.phylo(phy)  # computes the pairwise distances between the pairs of tips from a phylogenetic tree using its branch lengths

branch[lower.tri(branch)] <- NA
branch[which(branch == 0)] <- NA_character_

treemelt <- na.omit(melt(as.matrix(branch)), stringsAsFactors=F)  # melt an object into a form suitable for easy casting.
colnames(treemelt) <- c("sp1", "sp2", "dist")


MPD.big <- mean(as.numeric(as.character(treemelt$dist)))  # calculate MPD


####################################################################################################
## Generate Random Communities ##


# subset tree into clades >= n

node.tip <- listTips(phy)
node.comm <- list()


for (i in 1:length(node.tip)) {
	
	if (length(node.tip[[i]]) >= n) {
		node.comm[[i]] <- c(attributes(node.tip[[i]]))
	}
}


# Create list of random draws from each clade
num <- list()

for (i in 1:length(node.comm)) {
	
	if (length(node.comm[[i]]$names) == n) {
		num[[i]] <- replicate(2, sort(sample(node.comm[[i]][[1]], size=n, replace=F)), simplify=F)
  		print(i)
  	}
  	else if (length(node.comm[[i]]$names) == (n+1)) {
    	num[[i]] <- replicate((3*n), sort(sample(node.comm[[i]][[1]], size=n, replace=F)), simplify=F)
    	print(i)
  	}
  	else if (length(node.comm[[i]]$names) == (n+2)) {
    	num[[i]] <- replicate((Rn/5), sort(sample(node.comm[[i]][[1]], size=n, replace=F)), simplify=F)
    	print(i)
  	}
# 	else if (length(node.comm[[i]]$names) < (length(phy$tip.label)/3)){
#		num[[i]] <- replicate((Rn/2), sort(sample(node.comm[[i]][[1]], size=n, replace=F)), simplify=F)
#		print(i)
# 	}
	else {
		num[[i]] <- replicate(Rn, sort(sample(node.comm[[i]][[1]], size=n, replace=F)), simplify=F)
		print(i)
	}
}

# combine elements of list and remove duplicates.
num <- unique(unlist(num, recursive=F))



# function for calculating MPD across community lists and replicates
MPyDer <- function(branch, num) {
	
	branch <- branch
	branches <- na.omit(melt(branch[c(num), c(num)]))
	
	list(mean(as.numeric(as.character(branches$value))))
}


# apply function
out <- c()
out <- sapply(num, MPyDer, branch=branch)


####################################################################################################
## Output lists of communities ##

# create categories of MPD
catnum <- 10  # number of categories
minnum <- 100  # minimum number of datapoints per catgory


outx <- as.numeric(out)
outx <- as.data.frame(outx)
outx$cat <- as.numeric(cut2(outx[,1], m=minnum))


for (i in 1:length(unique(outx$cat))) {
	outx$catmean[which(outx$cat==i)] <- mean(outx$outx[which(outx$cat==i)])
}

cat <- seq(min(outx$catmean), max(outx$catmean), length.out=catnum)  # create categories
outx$cat2 <- findInterval(outx$catmean, cat)  # apply final categories

test2 <- as.data.frame(cbind(out, outx$cat2, num))

test2$num <- gsub("c\\(", '', test2$num)
test2$num <- gsub("\\)", '', test2$num)
test2$num <- gsub('\\"', '', test2$num)
test2$num <- gsub(", ", ',', test2$num)


final.sample <- list()
for (i in 1: catnum) {
	final.sample[[i]] <- test2[sample(which(test2$V2==i), size=100, replace=F),]
}

outmat <- rbindlist(final.sample)

# output for Tallen
Tallen.out <- matrix(0, nrow=(2*nrow(outmat)), ncol=1)

for (i in 1:nrow(outmat)) {
  Tallen.out[(2*i-1),1] <- paste('>', basename, '.', n, '-', i, '_', outmat$out[i], sep='')
  Tallen.out[(2*i),1] <- outmat$num[i]
}

write.table(Tallen.out, paste(basename, ".community.txt", sep=""), row.names=F, col.names=F, quote=F)




