# analysis 22/06/2022 # Tom Van Dooren tvdooren@gmail.com ############################################################### # read in libraries # libraries used library(ape) # to handle phylogenetic data library(phyr) # correlated ranefs library(lme4) # contains glmer library(phytools) library(MCMCglmm) ################################################################ # read data # tree downloaded from https://10ktrees.nunn-lab.org/Primates/downloadTrees.php primtree<-read.tree("Primates_species.nwk") plot(primtree) data<-read.csv("DataPrimates2022.txt",header=T) # inspection names(data) table(data$cerv7) # seven cervical vertebrae yes (1) or no (0) table(primtree$tip.label) data$species<-gsub(" ","_",data$Species) data$species<-factor(data$species) length(levels(data$species)) levels(data$species)[!levels(data$species)%in%primtree$tip.label] levels(data$species)[!levels(data$species)%in%primtree$tip.label]<-c(NA,"Ateles_fusciceps",NA,NA,NA,NA,"Chlorocebus_sabaeus","Eulemur_albifrons",NA, "Hylobates_muelleri",NA,"Indri_indri","Otolemur_crassicaudatus","Propithecus_verreauxi",NA,"Symphalangus_syndactylus") levels(data$species)%in%primtree$tip.label primtrim<-keep.tip(primtree,levels(data$species)) plot(primtrim) axisPhylo() bardata<-data.frame(normal=table(data$cerv7,data$species)[2,],err=-table(data$cerv7,data$species)[1,]) bardata[,2]<--1*bardata[,2] pdf("treedraft.pdf",useDingbats=F) plotTree.barplot(primtrim,bardata,args.plotTree=list(fsize=0.6),args.barplot=list(beside=TRUE,width=5)) dev.off() bardata<-data.frame(normal=table(data$cerv7,data$species)[2,]) plotTree.barplot(primtrim,bardata,args.plotTree=list(fsize=0.6),args.barplot=list(beside=TRUE,width=5,space=0,xlim=c(0,80))) bardata<-data.frame(normal=table(data$cerv7,data$species)[1,]) plotTree.barplot(primtrim,bardata,args.plotTree=list(fsize=0.6),args.barplot=list(beside=TRUE,width=5,space=0,xlim=c(0,80))) # A. Phylogenetic mixed models ########################### datasub<-subset(data,!is.na(species)&!is.na(cerv7)) datasub$site<-datasub$species datasub$sp<-datasub$species # use correlation matrices for comparability, algorithms will estimate variance multiplier Vphy1 <- vcv(primtrim,corr=T) Vphy2 <- vcv(corMartins(0.01,primtrim,fixed=T),corr=T) Vphy3 <- vcv(corMartins(0.02,primtrim,fixed=T),corr=T) Vphy4 <- vcv(corMartins(0.04,primtrim,fixed=T),corr=T) Vphy5 <- vcv(corMartins(0.06,primtrim,fixed=T),corr=T) Vphy6 <- vcv(corMartins(0.1,primtrim,fixed=T),corr=T) Vphylist<-list(Vphy1,Vphy2,Vphy3,Vphy4,Vphy5,Vphy6) # didn't work, can't deal with repeated observations per species #library(pez) #binaryPGLMM(cerv7 ~ activity, phy=primtrim, data=datasub) # didn't work #re.1 <- list(1, sp = datasub$sp, covar = Vphy) #re.2 <- list(1, site = datasub$site, covar = diag(nspp)) #communityPGLMM.binary(cerv7 ~ 1, data = datasub,sp = datasub$sp,site=datasub$site,random.effects =list(re.1,re.2)) # pglmm uses PQL combined with REML pglmmphy<-list() for(i in 1:length(Vphylist)){ bin1<-pglmm(cerv7 ~ sex+age+deathcapt+activity+(1|sp__), family = "binomial", cov_ranef=list(sp = Vphylist[[i]]), data = datasub) # correlated and independent bin2<-pglmm(cerv7 ~ sex+age+deathcapt+activity+(1|sp), family = "binomial", data = datasub) # independent species re.1 <- list(1, sp = datasub$sp, covar = Vphylist[[i]]) bin3 <- pglmm(cerv7 ~ sex+age+deathcapt+activity, data = datasub, family = "binomial", random.effects = list(re.1)) # correlated species effects only pglmmphy[[i]]<-list(bin1,bin2,bin3)} # inspection, e.g.: pglmmphy[[1]][[1]]$AIC pglmmphy[[1]][[2]]$AIC pglmmphy[[1]][[3]]$AIC pglmmphy[[1]][[1]]$logLik pglmmphy[[1]][[3]]$logLik #optimize alpha parameter minimize -logLik logOU<-function(alpha){ Vphy <- vcv(corMartins(alpha,primtrim,fixed=T),corr=T) re <- list(1, sp = datasub$sp, covar = Vphy) bin <- pglmm(cerv7 ~ sex+age+deathcapt+activity, data = datasub, family = "binomial", random.effects = list(re)) # correlated species effects only return(-bin$logLik) } optOU<-optim(0.1,logOU,method="L-BFGS-B",hessian=T,lower=0.00001) # optimal alpha: 0.5162354? 0.1289325? optOU$logLik optOU$AIC Vphyopt<-vcv(corMartins(optOU$par,primtrim,fixed=T)) re.1 <- list(1, sp = datasub$sp, covar = Vphyopt) binoptREML <- pglmm(cerv7 ~ sex+age+deathcapt+activity, data = datasub, family = "binomial", random.effects = list(re.1)) # correlated species effects only # check, test random effect pglmm_profile_LRT(binoptREML, re.number = 1, cpp = TRUE) # is not standard LRT, divides p-value by two, is a case discussed by Self and Liang 1986 biniREML <- pglmm(cerv7 ~ sex+age+deathcapt+activity+(1|sp), data = datasub, family = "binomial") # independent species effects only # check, test random effect pglmm_profile_LRT(biniREML, re.number = 1, cpp = TRUE) # is not standard LRT, divides p-value by two, is a case discussed by Self and Liang 1986 pchisq(-2*(biniREML$logLik-binoptREML$logLik),1,lower.tail=F) # comparison, assuming we used one parameter more for binoptREML # test fixed effects binoptML <- pglmm(cerv7 ~ sex+age+deathcapt+activity, data = datasub, family = "binomial", random.effects = list(re.1),REML=F) # correlated species effects only binoptML2 <- pglmm(cerv7 ~ sex+age+deathcapt, data = datasub, family = "binomial", random.effects = list(re.1),REML=F) # correlated species effects only -2*(binoptML$logLik-binoptML2$logLik) summary(binoptML) biniML <- pglmm(cerv7 ~ sex+age+deathcapt+activity+(1|sp), data = datasub, family = "binomial", REML=F) # independent species effects only biniML2 <- pglmm(cerv7 ~ sex+age+deathcapt+(1|sp), data = datasub, family = "binomial", REML=F) # independent species effects only -2*(biniML$logLik-biniML2$logLik) summary(biniML) # for comparison: reBM <- list(1, sp = datasub$sp, covar = Vphy1) binML <- pglmm(cerv7 ~ sex+age+deathcapt+activity, data = datasub, family = "binomial", random.effects = list(reBM),REML=F) # correlated species effects only binML2 <- pglmm(cerv7 ~ sex+age+deathcapt, data = datasub, family = "binomial", random.effects = list(reBM),REML=F) # correlated species effects only -2*(binML$logLik-binML2$logLik) # bayesian modelling, comparison ################################ invVphy <- inverseA(remove.zero.brlen(primtrim),nodes="TIPS")$Ainv Vsite<- matrix(0,nrow=nspp,ncol=nspp) diag(Vsite)<-1 rownames(Vsite)<-rownames(Vphy) colnames(Vsite)<-colnames(Vphy) invVsite <- Matrix(solve(Vsite),sparse=T) # inversion is useless...just done like this to show a symmetry in the approach Vphylist<-list(Vphy1,Vphy2,Vphy3,Vphy4,Vphy5,Vphyopt) invVphy<-Matrix(solve(Vphy1),sparse=T) invVphyopt<-Matrix(solve(Vphyopt),sparse=T) prior <- list(R=list(V=0.012, nu=2), G=list(G1=list(V=1.41e-05,nu=100),G2=list(V=1.41e-05,nu=100))) mcmc1<-MCMCglmm(cerv7 ~ sex+age+deathcapt+activity, random=~species+site, ginvers=list(species=invVphy,site=invVsite), data=datasub, slice=TRUE, family="categorical",prior=prior, verbose=FALSE,,nitt=250000,burnin=50000,thin=500) mcmc2<-MCMCglmm(cerv7 ~ sex+age+deathcapt+activity, random=~species, data=datasub, slice=TRUE, family="categorical", verbose=FALSE,nitt=250000,burnin=50000,thin=500) prior <- list(R=list(V=0.012, nu=2), G=list(G1=list(V=1.41e-05,nu=100))) mcmc3<-MCMCglmm(cerv7 ~ sex+age+deathcapt+activity, random=~species, ginvers=list(species=invVphyopt), data=datasub, slice=TRUE, family="categorical", prior=prior,verbose=FALSE,nitt=250000,burnin=50000,thin=500) # glmer modelling, no correlations, different approach to optimization (Laplace approximation) glmer1<-glmer(cerv7 ~ sex+age+deathcapt+activity+(1|species), family="binomial",data=datasub) # help page states that this is an ML fit summary(glmer1) drop1(glmer1,test="C") confint(glmer1) # A. binomial and quasibinomial glm # ###################################### # we are not modelling the individual probability of a cervical anomaly, but interested in the species effects. # there are only categorical explanatory variables # the data are clustered, observations are sharing combinations of explanatory variables # http%3A%2F%2Fhighstat.com%2FBooks%2FBGS%2FGLMGLMM%2Fpdfs%2FHILBE-Can_binary_logistic_models_be_overdispersed2Jul2013.pdf glmF2QB<-glm(cerv7~Family,data=data,family=quasibinomial,subset=Family=="Lorisidae"|Family=="Galagidae") summary(glmF2QB) # really wide s.e. for family effects, extreme estimates (close to zero probability in one group) drop1(glmF2QB,test="F") glmF2B<-glm(cerv7~Family,data=data,family=binomial,subset=Family=="Lorisidae"|Family=="Galagidae") summary(glmF2B) # drop1(glmF2B,test="C") fisher.test(matrix(c(0,88,32,50),nrow=2,byrow=T)) glmF3B<-glm(cerv7~Family=="Lorisidae",data=data,family=binomial,subset=Family=="Lorisidae"|Family=="Lemuridae"|Family=="Indridae") summary(glmF3B) # drop1(glmF3B,test="C") confint(glmF3B)