##########################################
##### Multivariate Ratio Analysis (MRA)
##### LDA Ratio Extractor, v1.03 (for 2 and more groups)
##### Baur H., Leuenberger C. (2011) Systematic Biology 60, p. 816-818
##########################################



##################### Auxiliary functions #######################

### Function scatterhull used to plot the result from the LDA ratio extractor.
### Gives a scatterplot with convex hulls around specified groups.

scatterhull <- function(xvar, yvar, factor, col=c(1:nlevels(factor))[factor], 
                        lwd=1.5, pch=c(1:nlevels(factor))[factor], cex=0.9, xlab=deparse(substitute(xvar)), 
                        ylab=deparse(substitute(yvar)), main=NA, placelegend="bottomright", 
                        legend=levels(factor), pchlegend=c(1:nlevels(factor)), collegend=c(1:nlevels(factor)), cexlegend=0.8, border="gray70",...){
  
  plot(xvar, yvar, xlab=xlab, ylab=ylab, main=main, cex.main=0.9, col=col, pch=pch, cex=cex, ...)
  dataset <- data.frame(xvar, yvar, factor)
  for (i in 1:nlevels(factor)){
    dataset.1 <- dataset[as.numeric(factor)==i,]
    hull <- chull(dataset.1$xvar, dataset.1$yvar)
    polygon(dataset.1$xvar[hull], dataset.1$yvar[hull], border=border, lwd=lwd)
  }
  legend(placelegend, bty="n", legend, pch=pchlegend, col=collegend, cex=cexlegend)
}




normalize.data <- function(U){
  X <- as.matrix(scale(log(U),center=TRUE,scale=FALSE))
}


get.standard.distance <- function(x,y){
  x <- as.vector(x)
  y <- as.vector(y)
  D <- abs(mean(x)-mean(y))/(sd(c(scale(x,center=TRUE,scale=FALSE),c(scale(y,center=TRUE,scale=FALSE)))))
  return(D)
}


get.pseudo.inverse <- function(Sigma.k,k){
  
  p <- dim(Sigma.k)[1]
  
  ev <- eigen(Sigma.k)$values[1:(p-k)]
  V <- eigen(Sigma.k)$vectors
  
  Lambda.plus <- diag(c(1/ev,rep(0,k))) 
  Sigma.plus <- V%*%Lambda.plus%*%t(V)
  
  return(Sigma.plus)
}


get.Sigma <- function(X, g){
  
  g.names <- levels(g)
  p <- dim(X)[2]
  X.pooled <- matrix(nrow= 0, ncol=p)
  
  for (g.name in g.names){
    X.k <- X[g==g.name,1:p]
    
    if (is.vector(X.k)==FALSE){
      X.pooled <- rbind(X.pooled,scale(X.k,center=TRUE,scale=FALSE))
    }
  }
  Sigma <- cov(X.pooled)
  return(Sigma)
}


get.B <- function(X, g){
  
  g.names <- levels(g)
  m <- colMeans(X)
  
  B <- matrix(0,nrow=dim(X)[2],ncol=dim(X)[2])
  for (g.name in g.names){
    X.k <- as.matrix(X[g==g.name,])
    if (is.vector(X.k)==FALSE){
      n.k <- 1
      m.k <- colMeans(X.k)
    }
    else{
      n.k <- 1
      m.k <- X.k
    }
    
    B <- B + n.k*(m.k-m)%*%t(m.k-m)
  }
  return(B)
}


get.b.ij <- function(p){
  
  counter <- 1
  B.ij <- matrix(0,nrow=p,ncol=p*(p-1)/2)
  
  for (i in c(1:(p-1))){  
    for (j in c((1+i):p)){
      B.ij[i,counter] <- 1
      B.ij[j,counter] <- -1
      counter <- counter+1
    }
  }
  return(B.ij)
}

get.c <- function(Sigma,b,w){
  return((t(b)%*%Sigma%*%w)^2/(t(b)%*%Sigma%*%b))
}


get.mean.difference <- function(X, g, g1, g2){
  
  g.names <- levels(g)
  
  m2 <- colMeans(as.matrix(X[g==g.names[g2],]))
  m1 <- colMeans(as.matrix(X[g==g.names[g1],]))
  
  return(m2-m1)
}






##################### Main Function #####################


#### Arguments
# U             a numeric matrix (or data frame)
# g             a grouping variable with 2 or more levels
# k.max         maximum number of ratio to be extracted. Default value is kmax=2
# rx, ry        ratios to be plotted
# g1, g2        groups for which D.ij etc. are evalutated
# col, pch ...  Various obtions for adjusting the scatterplots (see 2. tutorial on LDA Ratio Extractor)

ldaRE <- function(U, g, k.max=4, rx=1, ry=2, g1=1, g2=2, col=c(1:nlevels(g))[g], pch=c(1:nlevels(g))[g], placelegend="bottomright", collegend=c(1:nlevels(g)), pchlegend=c(1:nlevels(g)), cexlegend=1, cex=1) {
  
  g <- as.factor(g)
  g.names <- levels(g)
  nb.g <- length(g.names)
  
  X <- normalize.data(U)
  
  p <- dim(X)[2]
  trait.names <- colnames(X)
  
  X1 <- X[g==g.names[g1],]
  X2 <- X[g==g.names[g2],]
  
  
  Sigma <- get.Sigma(X,g)
  B <- get.B(X,g)
  
  
  w <- solve(Sigma)%*%get.mean.difference(X, g, g1, g2)
  D.tot <- get.standard.distance(X1%*%w,X2%*%w)
  
  D <- matrix(0,nrow=k.max,ncol=4)
  
  
  I <- diag(1,p,p)
  a0 <- as.vector(rep(1,p)/p)
  M.k <- matrix(a0,ncol=1)
  
  D.size <- get.standard.distance(X1%*%a0,X2%*%a0)/D.tot
  print(paste("D.size for ", as.character(g.names[g1]), " and ", as.character(g.names[g2]),": ", as.character(round(D.size, 3)), sep=""))
  
  best.ratio <- matrix(0,nrow=k.max,ncol=2)
  
  B.ij <- get.b.ij(p)
  
  for (k in c(1:k.max)){
    
    P.k <- I-M.k%*%solve(t(M.k)%*%M.k)%*%t(M.k)
    
    Sigma.k <- P.k%*%Sigma%*%P.k
    
    Sigma.k.plus <- get.pseudo.inverse(Sigma.k,k)
    
    B.k <- P.k%*%B%*%P.k
    
    Sig.plus.B <- Sigma.k.plus%*%B.k
    Sig.plus.B <- 0.5*(Sig.plus.B+t(Sig.plus.B))
    
    w.k <- Re(eigen(Sig.plus.B)$vectors[,1])
    
    if (nb.g==2){
      m12 <- get.mean.difference(X,g,1,2)
      w.k <- Sigma.k.plus%*%m12
    }
    
    c.max <- 0
    for (ij in c(1:dim(B.ij)[2])){
      
      b.ij <- B.ij[,ij]
      c.ij <- get.c(Sigma,b.ij,w.k)
      
      if (c.ij>c.max){
        b.k <- b.ij 
        c.max <- c.ij
      }    
    }
    
    best.ratio[k,1] <- which(b.k==1)
    best.ratio[k,2] <- which(b.k==-1)
    
    print(paste("*********** round " , as.character(k), " ***********",sep=""))
    print(paste("Ratio no. ", as.character(k), ": ", trait.names[best.ratio[k,1]], "/", trait.names[best.ratio[k,2]], sep=""))
    M.k <- cbind(M.k,Sigma%*%b.k)
    
    D[k,1] <- k
    D[k,2] <- get.standard.distance(X1%*%w.k,X2%*%w.k) # D.bij
    D[k,3] <- get.standard.distance(X1%*%w.k,X2%*%w.k)/D.tot # D.shape
    D[k,4] <- D.size/(D.size+D[k,3]) # delta
    
    print(paste("Distances for ", as.character(g.names[g1]), " and ", as.character(g.names[g2]),":", sep=""))
    print(paste("D.bij = ", as.character(round(D[k,2], 3)), sep=""))
    print(paste("D.shape = ", as.character(round(D[k,3], 3)), sep=""))
    print(paste("delta = ", as.character(round(D[k,4], 3)), sep=""))
    
  }
  
  ratio <- U[,best.ratio[,1]]/U[,best.ratio[,2]]
  colnames(ratio) <- paste(trait.names[best.ratio[,1]], "/", trait.names[best.ratio[,2]], sep="")
  
  ### plotting 2 ratios
  main <- paste("Ratio no. ", as.character(rx), " and ratio no. ", as.character(ry), sep="")
  scatterhull(ratio[,rx], ratio[,ry], g, xlab=paste(trait.names[best.ratio[rx,1]],"/",trait.names[best.ratio[rx,2]]), 
              ylab=paste(trait.names[best.ratio[ry,1]],"/",trait.names[best.ratio[ry,2]]), main=main, col=col, pch=pch, placelegend=placelegend, collegend=collegend, pchlegend=pchlegend, cexlegend=cexlegend, cex=cex)
  
  return(list(ratios=ratio, D.size=D.size, Distances=D))
}



