## This function performs Empirical Bayes correction of Z-axis associated
## fluorescence decay on any given vector of values, 
## as performed in Saiz *et al* (2016) *Nature Comms* and others subsequently.
## This transformation was devised and written by Venkat Seshan for our work
## in Saiz *et al* (2016) *Nature Comms* and subsequently wrapped into a
## more versatile function by myself for easier application to other analyses.

## The function requires these arguments: 
## * x: the dataframe to work with (a set of multiple embryos)
## * channel: the fluorescence channel to correct (the vector of values),
##   which can be given as an integer (1-5), for convenience if you are using
##   unmodified variable names generated by MINS (in the 'CHn.Avg' format), 
##   or as a string (the full name of the variable/column, in quotations).

## It also requires these three, which have defaults:
## * embryo.var: variable name for embryo identifier (by default 'Embryo_ID')
## * Z.var: variable containing the Z-axis position (by default 'Z')
## * TI.var: variable name for TE vs ICM variable (by default 'TE_ICM')

## The arguments group and logadd are optional and have defaults:
## * group: a grouping variable used to separate embryos (NULL by default)
## * logadd: a value to add to all fluorescence measurements to avoid NAs due 
##   to zero values (by default logadd = 0.0001)

## This function returns the corrected values, in log scale, as a new vector. 
## It will NOT modify the original data.frame. This provides flexibility to 
## correct a single channel and assign the result to a new variable, or to 
## incorporate the function into a loop to correct multiple channels at once.
## For usage examples see some of my analysis code.

ebcor <- function(x, channel, embryo.var = 'Embryo_ID', Z.var = 'Z',
                  TI.var = 'TE_ICM', group = NULL, logadd = 0.0001) {
  # Point at the Z variable
  x$Z <- get(Z.var, x)
  # Check if Z.var contains only numbers
  zz <- as.numeric(x$Z, warning = F)
  if(TRUE %in% unique(is.na(zz))) { 
    stop("Darling, your Z axis has more than just numbers, go check that")
  }
  # get unique embryo IDs in the dataset
  x$Embryo_ID <- get(embryo.var, x)
  embryos <- unique(x$Embryo_ID)
  # if using integers (1 - 5) for channel, convert to standard names
  if (is.numeric(channel) == TRUE) {
    channels <- c('CH1.Avg', 'CH1.Avg', 'CH2.Avg', 'CH3.Avg', 'CH4.Avg', 'CH6.Avg')
    channel <- channels[channel]
  }
  # else, use whatever channel name given
  x$CH.Avg <- get(channel, x)
  
  ## Extract unique values of TI.var in x
  TI.vals <- as.data.frame(unique(x[which(colnames(x) == TI.var)]))
  ## Find which one may correspond to trophectoderm
  te <- TI.vals[, 1][which(TI.vals[, 1] %in% c('TE', 'Te', 'te', 'T', 
                                               'Trophectoderm', 'TB', 
                                               'tb', 'Tb', 
                                               'Trophoblast'))]
  if(length(te) == 0) {
    te <- 'TE'
  }
  ## and point at TE vs ICM variable
  x$TE_ICM <- get(TI.var, x)
  # fitted regression coefficients and their standard errors
  coefs <- matrix(0, length(embryos), 2)
  for(i in 1:length(embryos)) {
    xi <- x[x$Embryo_ID == embryos[i],]
    coefs[i, 1:2] <- summary(lm(log(CH.Avg + logadd) ~ Z + (TE_ICM == te), 
                                data=xi))$coefficients[2, 1:2]
  }
  # if grouping variable is NULL create a dummy vector of 1s
  if (missing(group)) group <- rep(1, nrow(x))
  # group indicator of each embryo
  egrp <- tapply(group, x$Embryo_ID, function(y) {unique(y)[1]})
  # Emperical Bayes correction across the embryos in a group
  ebcoefs <- rep(0, length(embryos))
  for (i in unique(egrp)) {
    ebcoefs[egrp==i] <- mean(coefs[egrp==i, 1]) + 
      (1 - coefs[egrp==i, 2]^2/(coefs[egrp==i, 2]^2 + 
                                  var(coefs[egrp==i, 1])))*(coefs[egrp==i, 1] - 
                                                              mean(coefs[egrp==i, 1]))
  }
  # EB corrected log signal
  CH.ebLogCor <- rep(NA, nrow(x))
  for(i in 1:length(embryos)) {
    ii <- x$Embryo_ID == embryos[i]
    CH.ebLogCor[ii] <- log(logadd + x$CH.Avg[ii]) - ebcoefs[i]*x$Z[ii]
  }
  # return EB corrected log signal
  CH.ebLogCor
}