library(Hmsc)
library(gllvm)

load("data_meta_otu.Rdata")

# CONTAINS THE FOLLOWING OBJECTS
#     meta,
#     otu.table.plausible,
#     otu.table.reliable,
#     read.counts.plausible,
#     read.counts.reliable,
#     taxonomy.plausible,
#     taxonomy.reliable

otu.table = otu.table.plausible
taxonomy = taxonomy.plausible
read.counts = read.counts.plausible

sel = read.counts$filt_nread>=10000
otu.table = otu.table[sel,]
meta = meta[sel,]
read.counts = read.counts[sel,]
read.counts$spike_nread = read.counts$nochim2_nread-read.counts$nospike_nread
meta$dna_amount = read.counts$nospike_nread/read.counts$spike_nread
for(i in 1:nrow(otu.table)){
  if(sum(otu.table[i,])>0) otu.table[i,] = otu.table[i,]/sum(otu.table[i,])
}

inocs = c("Antrodia_piceata",
          "Antrodiella_citrinella",
          "Fomitopsis_rosea",
          "Perenniporia_subacida",
          "Physisporinus_crocatus",
          "Postia_guttulata",
          "Skeletocutis_odora",
          "Skeletocutis_stellae",
          "Steccherinum_collabens"
)

spp = c("Antrodia_piceata_813073",
        "Flaviporus_citrinellus_106139",
        "Rhodofomes_roseus_127496",
        "Perenniporia_subacida_335816",
        "Rigidoporus_crocatus_107233",
        "Postia_guttulata_110917",
        "Skeletocutis_odora_106476",
        "Skeletocutis_stellae_323595",
        "Junghuhnia_collabens_315979")

ni = length(inocs)

allmodels = list()
for(iii in 1:ni){
  inoc = inocs[iii]
  print("focal species:")
  print(inoc)
  f.sp = which(taxonomy$species==spp[iii])
  print("alias")
  print(as.character(taxonomy$species[f.sp]))
  logs = unique(meta[meta$InocSpecies==inoc,]$RunningLogID)
  nl = length(logs)
  print(nl)
  
  y = NULL
  LT = NULL
  DC = NULL
  siteCode = NULL
  dna_amount = NULL
  seq.depth = c()
  RC = c()
  
  
  for(i in 1:nl){
    f.log = logs[i]
    sel = which(meta$RunningLogID==f.log)
    sel.2019 = sel[which(meta[sel,]$SamplingYear=="2019")]
    sel.after = setdiff(sel,sel.2019)
    if(length(sel.2019)>0 && otu.table[sel.2019,f.sp]==0 && length(sel.after)>0){
      RC = rbind(RC,otu.table[sel.2019,])
      dna_amount = c(dna_amount,meta$dna_amount[sel.2019])
      seq.depth = c(seq.depth,mean(log(read.counts$filt_nread[sel.after])))
      y = c(y,1*(sum(otu.table[sel.after,f.sp])>0))
      LT = c(LT,as.character(meta[sel,]$LogType[1]))
      DC = c(DC,meta[sel,]$DecayStage2019[1])
      siteCode = c(siteCode,as.character(meta[sel,]$SiteCode[1]))
    }
  }
  
  LT = as.factor(LT)
  siteCode = as.factor(siteCode)
  studyDesign = data.frame(siteCode)
  rL.site = HmscRandomLevel(units = levels(studyDesign$siteCode))
  
  RC.pa = 1*(RC>0)
  RC.abu = sqrt(sqrt(RC))
  X.abu.10 = RC.abu[,sel]
  
  # Environment-only
  m.null = Hmsc(Y=as.matrix(y),
                studyDesign = studyDesign,
                XData = data.frame(LT,DC,seq.depth),
                XFormula = ~LT+DC+seq.depth,
                ranLevels = list(siteCode = rL.site),
                distr="probit")
  
  # DNA amount
  m.dna = Hmsc(Y=as.matrix(y),
               studyDesign = studyDesign,
               XData = data.frame(LT,DC,dna_amount = log(dna_amount),seq.depth),
               XFormula = ~dna_amount + LT+DC+seq.depth,
               ranLevels = list(siteCode = rL.site),
               distr="probit")
  
  # Total species richness
  S = rowSums(RC.pa)
  m.S = Hmsc(Y=as.matrix(y),
             studyDesign = studyDesign,
             XData = data.frame(LT,DC,S = S,seq.depth),
             XFormula = ~S + LT+DC+seq.depth,
             ranLevels = list(siteCode = rL.site),
             distr="probit")
  
  # Per-phylum species richness
  S.basio = rowSums(RC.pa[,taxonomy$phylum=="Basidiomycota"])
  S.asco = rowSums(RC.pa[,taxonomy$phylum=="Ascomycota"])
  m.ba = Hmsc(Y=as.matrix(y),
              studyDesign = studyDesign,
              XData = data.frame(S.basio,S.asco,LT,DC,seq.depth),
              XFormula = ~S.basio + S.asco + LT + DC + seq.depth,
              ranLevels = list(siteCode = rL.site),
              distr="probit")
  
  # Community composition PA
  cm = as.matrix(RC.pa)
  cm = cm[,colSums(cm)>9]
  my.gllvm = gllvm(cm, family = "binomial", link = "probit")
  lv1=as.numeric(scale(my.gllvm$lvs[,1]))
  lv2=as.numeric(scale(my.gllvm$lvs[,2]))
  X.LV.pa = cbind(lv1,lv2)
  
  m.lv.pa = Hmsc(Y=as.matrix(y),
                 studyDesign = studyDesign,
                 XData = data.frame(X.LV.pa,LT,DC,seq.depth),
                 XFormula = ~lv1 + lv2 + LT + DC + seq.depth,
                 ranLevels = list(siteCode = rL.site),
                 distr="probit")
  
  # Community composition RRA
  cm = as.matrix(RC.abu)
  cm = cm[,colSums(RC.abu>0)>9]
  my.gllvm = gllvm(cm, family = "binomial", link = "probit")
  lv1=as.numeric(scale(my.gllvm$lvs[,1]))
  lv2=as.numeric(scale(my.gllvm$lvs[,2]))
  X.LV.abu = cbind(lv1,lv2)
  
  m.lv.abu = Hmsc(Y=as.matrix(y),
                  studyDesign = studyDesign,
                  XData = data.frame(X.LV.abu,LT,DC,seq.depth),
                  XFormula = ~lv1 + lv2 + LT + DC + seq.depth,
                  ranLevels = list(siteCode = rL.site),
                  distr="probit")
  
  # Ten most common resident species (PA & RRA)
  sel = order(colMeans(RC.pa),decreasing = TRUE)[1:10]
  X.pa.10 = RC.pa[,sel]
  X.abu.10 = RC.abu[,sel]
  
  m.pa.10 = Hmsc(Y=as.matrix(y),
                 studyDesign = studyDesign,
                 XData = data.frame(X.pa.10,LT,DC,seq.depth),
                 XFormula = ~.,
                 ranLevels = list(siteCode = rL.site),
                 distr="probit")
  
  m.abu.10 = Hmsc(Y=as.matrix(y),
                  studyDesign = studyDesign,
                  XData = data.frame(X.abu.10,LT,DC,seq.depth),
                  XFormula = ~.,
                  ranLevels = list(siteCode = rL.site),
                  distr="probit")
  
  # Ten most common species with variable selection (PA & RRA)
  XSelect = list()
  for (k in 1:10){
    XSelect[[k]] = list(covGroup = k+1,
                        spGroup = 1,
                        q = 0.1)
  }
  m.sel.pa.10 = Hmsc(Y=as.matrix(y),
                     studyDesign = studyDesign,
                     XData = data.frame(X.pa.10,LT,DC,seq.depth),
                     XFormula = ~.,
                     XSelect = XSelect,
                     ranLevels = list(siteCode = rL.site),
                     distr="probit")
  
  m.sel.abu.10 = Hmsc(Y=as.matrix(y),
                      studyDesign = studyDesign,
                      XData = data.frame(X.abu.10,LT,DC,seq.depth),
                      XFormula = ~.,
                      XSelect = XSelect,
                      ranLevels = list(siteCode = rL.site),
                      distr="probit")
  
  models = list(null=m.null,
                dna=m.dna,
                S=m.S,
                ba=m.ba,
                lv.pa=m.lv.pa,
                lv.abu=m.lv.abu,
                pa.10=m.pa.10,
                abu.10=m.abu.10,
                sel.pa.10=m.sel.pa.10,
                sel.abu.10=m.sel.abu.10)
  
  allmodels[[iii]] = models
}
names(allmodels) = inocs
save(allmodels,file = "allModels.RData")