lm1 <- lm(mean.vir ~ SpeciesP, data = tmp2)
Anova(lm1) # no difference in rostrata versus infectans
lm1$coefficients
summary(lm1)
confint(lm1)
# try arcsin transform to make the distrib more normal -> does not change anything
tmp2$amean.vir <- asin(tmp2$mean.vir)
lm12 <- lm(amean.vir ~ SpeciesP, data = tmp2)
Anova(lm12)
###  ------------------------------------------------------------------------------------------  ###
####                      CORRELATION BETWEEN SPECIES AND LOCATIONS                             ####
###  ------------------------------------------------------------------------------------------  ###
#### CORRELATION OF RESISTANCE TO INFECTANS AND RESISTANCE TO ROSTRATA FOR THE HOST ####
all.sum <- ddply(all[all$OrigineP=="PZ",], .(SoucheH, SpeciesP), summarise, OrigineH = unique(OrigineH),
vir = mean(Virulence, na.rm = T)) # use only the PZ parasites
all.sum.ros <- all.sum[which(all.sum$SpeciesP == "rostrata"),]
all.sum.inf <- all.sum[which(all.sum$SpeciesP == "infectans"),]
dim(all.sum.ros)
dim(all.sum.inf)
stopifnot(all(all.sum.ros$SoucheH == all.sum.inf$SoucheH))
stopifnot(all(all.sum.ros$OrigineH == all.sum.inf$OrigineH))
plot(all.sum.ros$vir, all.sum.inf$vir, xlim = c(0,1), ylim = c(0,1))
abline(0,1)
summary(lm(all.sum.ros$vir ~ all.sum.inf$vir))
plot(all.sum.ros$vir[all.sum.ros$OrigineH == "RC"],
all.sum.inf$vir[all.sum.ros$OrigineH == "RC"]
, xlim = c(0,1), ylim = c(0,1))
abline(0,1)
summary(lm(all.sum.ros$vir[all.sum.ros$OrigineH == "RC"]
~ all.sum.inf$vir[all.sum.inf$OrigineH == "RC"]))
# infectans versus rostrata with Penze only hosts
pdf("~/Dropbox (Personal)/Laure Guillou data/correlation.res.inf.ros.pdf", width = 6, height = 6)
layout(matrix(c(1, 2, 3, 4, 5, 3, 6, 6, 7), nrow = 3, ncol = 3, byrow = T), widths = c(1,1,2), heights = c(1,1,2))
par(mar=c(2, 1, 2, 1)) #  bottom, left, top and right margins
plot(1, type="n", axes=F, xlab="", ylab="", xlim = c(0,1), ylim = c(0,1))
text(0.1, 0.9, "A", cex = 4)
plot.mat(m3, expression(paste(italic("P. rostrata"), " in Penzé")))
# HERE ADJUST SIZE OF POINTS FOR MULTIPLICITY
df1 <- data.frame(V1 = 1-all.sum.ros$vir[all.sum.ros$OrigineH == "PZ"], V2 = 1-all.sum.inf$vir[all.sum.ros$OrigineH == "PZ"])
df1 <- ddply(df1, .(V1, V2), summarise, count = length(V2))
par(mar=c(4, 4, 2.1, 1))  # mar=c(5.1,4.1,4.1,2.1) bottom, left, top and right
plot(df1$V1, df1$V2, xlim = c(0,1), ylim = c(0,1),
xlab = expression(paste("Host resistance to ", italic("P. rostrata"))), las = 1,
ylab = expression(paste("Host resistance to ", italic("P. infectans"))), pch = 1, cex = df1$count)
abline(0,1, lty = "dashed")
mtext("B", side = 3, at = 0, cex = 2)
res.ros <- 1 - all.sum.ros$vir[all.sum.ros$OrigineH == "PZ"]
res.inf <- 1 - all.sum.inf$vir[all.sum.inf$OrigineH == "PZ"]
lm.corr1 <- lm(res.inf ~ res.ros)
beta <- lm.corr1$coefficients[2]
intercept <- lm.corr1$coefficients[1]
pvalue <- anova(lm.corr1)["Pr(>F)"][1,1]
abline(intercept, beta)
#text(0.6, 0.2, labels = paste("beta=", round(beta,2), "\np =", round(pvalue,2)), cex =1.3)
par(mar=c(1, 1, 2, 1))
plot.mat(m1, expression(paste(italic("P. infectans"), " in Rance")))
plot.mat(m2, expression(paste(italic("P. infectans"), " in Penzé")))
#### CORRELATION BETWEEN RESISTANCE TO INFECTANS OF PZ AND OF RC ####
# (WEAKLY CORRELATED, MORE SO FOR RC HOSTS)
# sort out mean infectivity of each host strain by parasite origin
all.sum2 <- ddply(all[all$SpeciesP=="infectans",], .(SoucheH, OrigineP), summarise, OrigineH = unique(OrigineH),
vir = mean(Virulence, na.rm = T))
all.sum2.RC <- all.sum2[which(all.sum2$OrigineP == "RC"),]
all.sum2.PZ <- all.sum2[which(all.sum2$OrigineP == "PZ"),]
stopifnot(all(all.sum2.RC$SoucheH == all.sum2.PZ$SoucheH)) # in these two tables the host strains are the same
# all host strains together:
# HERE ADJUST SIZE OF POINTS FOR MULTIPLICITY
par(mar=c(4, 4, 2, 1))
df1 <- data.frame(V1 = 1 - all.sum2.RC$vir[all.sum2.RC$OrigineH == "PZ"], V2 = 1 - all.sum2.PZ$vir[all.sum2.PZ$OrigineH == "PZ"])
df1 <- ddply(df1, .(V1, V2), summarise, count = length(V2))
plot(df1$V1, df1$V2, xlim = c(0,1), ylim = c(0,1), las = 1,
xlab = expression(paste("Host resistance to Rance ", italic("P. infectans"))),
ylab = expression(paste("Host resistance to Penzé ", italic("P. infectans"))), pch = 1, cex=df1$count
)
df2 <- data.frame(V1 = 1-all.sum2.RC$vir[all.sum2.RC$OrigineH == "RC"], V2 = 1-all.sum2.PZ$vir[all.sum2.PZ$OrigineH == "RC"])
df2 <- ddply(df2, .(V1, V2), summarise, count = length(V2))
points(df2$V1, df2$V2, pch = 16, cex = df2$count)
legend(0., 1, legend = c("Host from Penzé", "Host from Rance"), pch = c(1, 16))
abline(0,1, lty = "dashed")
mtext("C", side = 3, at = 0, cex = 2)
res.RC <- 1-all.sum2.RC$vir
res.PZ <- 1-all.sum2.PZ$vir
lm.corr2 <- lm(res.PZ ~ res.RC)
beta <- lm.corr2$coefficients[2]
intercept <- lm.corr2$coefficients[1]
pvalue <- anova(lm.corr2)["Pr(>F)"][1,1]
abline(intercept, beta)
summary(lm(all.sum2.RC$vir ~ all.sum2.PZ$vir))
summary(lm(all.sum2.RC$vir ~ all.sum2.PZ$vir))$r.squared
#### CORRELATION FOR INFECTANS OF INFECTIVITY TO PZ AND RESISTANCE TO RC HOSTS ####
all.sum3 <- ddply(all[all$SpeciesP=="infectans",], .(SoucheP, OrigineH), summarise, OrigineP = unique(OrigineP),
vir = mean(Virulence, na.rm = T))
all.sum3.RC <- all.sum3[which(all.sum3$OrigineH == "RC"),]
all.sum3.PZ <- all.sum3[which(all.sum3$OrigineH == "PZ"),]
dim(all.sum3.RC)
dim(all.sum3.PZ)
stopifnot(all(all.sum3.RC$SoucheP == all.sum3.PZ$SoucheP)) # check same strains exactly
# all strains together:
para.PZ <- which(all.sum3.RC$OrigineP == "PZ") # strains from PZ
para.RC <- which(all.sum3.RC$OrigineP == "RC")
par(mar=c(4, 4, 2, 1))
plot(all.sum3.RC$vir[para.PZ], all.sum3.PZ$vir[para.PZ], xlim = c(0,1), ylim = c(0,1), las = 1,
pch = 0, col = "black", xlab = "Parasite infectivity on Rance host", ylab = "Parasite infectivity on Penzé host")
points(all.sum3.RC$vir[para.RC], all.sum3.PZ$vir[para.RC], col = "black", pch = 15)
abline(0,1, lty = "dashed")
mtext("D", side = 3, at = 0, cex = 2)
summary(lm(all.sum3.RC$vir ~ all.sum3.PZ$vir)) # very high correlation
summary(lm(all.sum3.RC$vir ~ all.sum3.PZ$vir))$r.squared
lm.corr3 <- lm(all.sum3.RC$vir ~ all.sum3.PZ$vir)
beta <- lm.corr3$coefficients[2]
intercept <- lm.corr3$coefficients[1]
abline(intercept, beta)
#### CORRELATION FOR ROSTRATA OF INFECTIVITY TO PZ AND RESISTANCE TO RC HOSTS ####
all.sum4 <- ddply(all[all$SpeciesP=="rostrata",], .(SoucheP, OrigineH), summarise, OrigineP = unique(OrigineP),
vir = mean(Virulence, na.rm = T))
all.sum4.RC <- all.sum4[which(all.sum4$OrigineH == "RC"),]
all.sum4.PZ <- all.sum4[which(all.sum4$OrigineH == "PZ"),]
dim(all.sum4.RC)
dim(all.sum4.PZ)
stopifnot(all(all.sum4.RC$SoucheP == all.sum4.PZ$SoucheP)) # check same strains exactly
# all strains together: (all parasite come from PZ anyway)
points(all.sum4.PZ$vir, all.sum4.RC$vir, xlim = c(0,1), ylim = c(0,1),
pch = 3)
legend(0., 1, legend = c(expression(paste(italic("P. infectans"),  " from Penzé")), expression(paste(italic("P. infectans"),  " from Rance")), expression(paste(italic("P. rostrata")))), pch = c(0, 15, 3))
# abline(0,1)
dev.off() #### END OF THE GRAPH
rm(list = ls())
library(plyr)
library(lme4)
library(bipartite)
library(car)
all <- read.csv("~/Dropbox/Laure Guillou data/all.csv", sep = ",", header = T)
###  ----------------------------------------------------------------  ###
####                      NESTEDNESS ANALYSIS                         ####
###  ----------------------------------------------------------------  ###
create_mat <- function(set){
sH <- names(table(all[set,]$SoucheH))
sP <- names(table(all[set,]$SoucheP)[which(table(all[set,]$SoucheP) != 0)])
# reorder sH and sP by average infectivity
sortH <- tapply(all[set,]$Virulence, all[set,]$SoucheH, mean, na.rm  = T)
sortP <- tapply(all[set,]$Virulence, all[set,]$SoucheP, mean, na.rm = T)
sortH <- sort.int(sortH, index.return = T, decreasing = T)$ix
sortP <- sort.int(sortP, index.return = T, decreasing = T)$ix
sH <- sH[sortH]
sP <- sP[sortP]
mat_inf <- matrix(0,nrow = length(sP), ncol = length(sH))
for(ii in 1:length(sP)){
for(jj in 1:length(sH)){
this_idx <- which(all[set,]$SoucheH == sH[jj] & all[set,]$SoucheP == sP[ii])
stopifnot(length(this_idx) < 2)
if(length(this_idx) > 0){
mat_inf[ii,jj] = all[set,]$Virulence[this_idx]
} else {
mat_inf[ii,jj] = NA
}
}
}
return(mat_inf)
}
replace.NA <- function(mat){ # replace NA with random infectivity in proportion to average infectivity
average.inf <- mean(mat, na.rm = T)
na.idx <- which(is.na(mat), arr.ind = T)
for(ii in 1:nrow(na.idx)){
mat[na.idx[ii,1], na.idx[ii,2]] <- rbinom(1, 1, prob = average.inf)
}
return(mat)
}
which(all$SpeciesP == "infectans" & (all$OrigineH == "RC" & all$OrigineP == "RC")) -> mi1
which(all$SpeciesP == "infectans" & (all$OrigineH == "PZ" & all$OrigineP == "PZ")) -> mi2
which(all$SpeciesP == "rostrata" & (all$OrigineH == "PZ" & all$OrigineP == "PZ")) -> mi3
create_mat(mi1) -> mat1
create_mat(mi2) -> mat2
create_mat(mi3) -> mat3
# average infectivity
average.inf1 <- mean(mat1, na.rm = T)
average.inf2 <- mean(mat2, na.rm = T)
average.inf3 <- mean(mat3, na.rm = T)
# replace NA values with random 0 or 1 in proportion of average infectivity
mat1 <- replace.NA(mat1) # 7*43 matrix
mat2 <- replace.NA(mat2) # 8*72 matrix
mat3 <- replace.NA(mat3) # 22*72 matrix
non_empty_cols1 <- which(apply(mat1, 2, sum) != 0) # only this guy has empty cols (hosts fully resistant to parasites)
non_empty_cols2 <- which(apply(mat2, 2, sum) != 0)
non_empty_cols3 <- which(apply(mat3, 2, sum) != 0)
mat1.2 <- mat1[, non_empty_cols1] # 7*33 matrix now
mat2.2 <- mat2[, non_empty_cols2] # 8*72 matrix
mat3.2 <- mat3[, non_empty_cols3] # 22*72 matrix
n1 <- nestedness(mat1.2, n.nulls = 100) # was 1000 for files we saved
n2 <- nestedness(mat2.2, n.nulls = 100)
n3 <- nestedness(mat3.2, n.nulls = 100)
#### save nestedness estimators in file ####
df <- data.frame(name = c("P.infectans PZxPZ", "P.infectans RCxRC", "P.rostrata PZxPZ"),
n.parasite = c(n2$n.rows, n1$n.rows, n3$n.rows),
n.host = c(n2$n.cols, n1$n.cols, n3$n.cols),
average.inf = c(average.inf2, average.inf1, average.inf3),
temperature = c(n2$temperature, n1$temperature, n3$temperature),
p.value = c(n2$p.null3, n1$p.null3, n3$p.null3)
)
df$temperature <- round(df$temperature, 1)
df$average.inf <- round(df$average.inf, 2)
#write.table(df, file = "~/Dropbox (Personal)/Laure Guillou data/summary_nestedness.csv", sep = ",", row.names = F, col.names = T)
#### figure matrices:
#image(mat1.2[n1$pack.order.row, n1$pack.order.col], col = c("white", "black"))
#image(mat2.2[n2$pack.order.row, n2$pack.order.col[n2$pack.order.col > 0]], col = c("white", "black"))
#image(mat3.2[n3$pack.order.row[n3$pack.order.row > 0], n3$pack.order.col[n3$pack.order.col > 0]], col = c("white", "black"))
order.col1 <- n1$pack.order.col[n1$pack.order.col > 0]
order.col1 <- c(1, order.col1, which(colSums(mat1) == 0)) # I have to add the first col here, not sure why it is removed
order.col2 <- n2$pack.order.col[n2$pack.order.col > 0]
order.col2 <- c(setdiff(1:ncol(mat2), order.col2), order.col2)
order.col3 <- n3$pack.order.col[n3$pack.order.col > 0]
order.col3 <- c(setdiff(1:ncol(mat3), order.col3), order.col3)
order.row3 <- n3$pack.order.row[n3$pack.order.row > 0]
order.row3 <- c(setdiff(1:nrow(mat3), order.row3), order.row3)
m1 <- cbind(mat1.2[n1$pack.order.row, ], mat1[, which(colSums(mat1) == 0)]) # for m1 add the columns we erased
m2 <- mat2.2[n2$pack.order.row, order.col2]
m3 <- mat3.2[order.row3, order.col3]
# check dimensions are identical than original:
stopifnot(all(dim(m1) == dim(mat1)))
stopifnot(all(dim(m2) == dim(mat2)))
stopifnot(all(dim(m3) == dim(mat3)))
plot.mat <- function(m, label){
plot(NA,xlim = c(0,ncol(m)), ylim = c(0, nrow(m)), main = label, cex = 0.8,
xlab = "", ylab =  "", xaxt = 'n', yaxt = 'n')
for(ii in 1:nrow(m)){
for(jj in 1:ncol(m)){
if(!is.na(m[ii,jj]) & m[ii,jj]==1){ # parasite infects
rect(jj-1, ii-1, jj, ii, col = "black", lwd = 0, border = NA)
}
if(!is.na(m[ii,jj]) & m[ii,jj]==0){ # parasite cannot infect
rect(jj-1, ii-1, jj, ii, col = "white", lwd = 0, border = NA)
}
}
}
}
plot.mat(m1, "P. infectans in Rance")
plot.mat(m2, "P. infectans in Penzé")
plot.mat(m3, "P. rostrata in Penzé")
###  ------------------------------------------------------------------------------------------  ###
####                      CORRELATION BETWEEN SPECIES AND LOCATIONS                             ####
###  ------------------------------------------------------------------------------------------  ###
#### CORRELATION OF RESISTANCE TO INFECTANS AND RESISTANCE TO ROSTRATA FOR THE HOST ####
all.sum <- ddply(all[all$OrigineP=="PZ",], .(SoucheH, SpeciesP), summarise, OrigineH = unique(OrigineH),
vir = mean(Virulence, na.rm = T)) # use only the PZ parasites
all.sum.ros <- all.sum[which(all.sum$SpeciesP == "rostrata"),]
all.sum.inf <- all.sum[which(all.sum$SpeciesP == "infectans"),]
dim(all.sum.ros)
dim(all.sum.inf)
stopifnot(all(all.sum.ros$SoucheH == all.sum.inf$SoucheH))
stopifnot(all(all.sum.ros$OrigineH == all.sum.inf$OrigineH))
plot(all.sum.ros$vir, all.sum.inf$vir, xlim = c(0,1), ylim = c(0,1))
abline(0,1)
summary(lm(all.sum.ros$vir ~ all.sum.inf$vir))
plot(all.sum.ros$vir[all.sum.ros$OrigineH == "RC"],
all.sum.inf$vir[all.sum.ros$OrigineH == "RC"]
, xlim = c(0,1), ylim = c(0,1))
abline(0,1)
summary(lm(all.sum.ros$vir[all.sum.ros$OrigineH == "RC"]
~ all.sum.inf$vir[all.sum.inf$OrigineH == "RC"]))
# infectans versus rostrata with Penze only hosts
pdf("~/Dropbox (Personal)/Laure Guillou data/correlation.res.inf.ros.pdf", width = 6, height = 6)
layout(matrix(c(1, 2, 3, 4, 5, 3, 6, 6, 7), nrow = 3, ncol = 3, byrow = T), widths = c(1,1,2), heights = c(1,1,2))
par(mar=c(2, 1, 2, 1)) #  bottom, left, top and right margins
plot(1, type="n", axes=F, xlab="", ylab="", xlim = c(0,1), ylim = c(0,1))
text(0.1, 0.9, "A", cex = 4)
plot.mat(m3, expression(paste(italic("P. rostrata"), " in Penzé")))
# HERE ADJUST SIZE OF POINTS FOR MULTIPLICITY
df1 <- data.frame(V1 = 1-all.sum.ros$vir[all.sum.ros$OrigineH == "PZ"], V2 = 1-all.sum.inf$vir[all.sum.ros$OrigineH == "PZ"])
df1 <- ddply(df1, .(V1, V2), summarise, count = length(V2))
par(mar=c(4, 4, 2.1, 1))  # mar=c(5.1,4.1,4.1,2.1) bottom, left, top and right
plot(df1$V1, df1$V2, xlim = c(0,1), ylim = c(0,1),
xlab = expression(paste("Host resistance to ", italic("P. rostrata"))), las = 1,
ylab = expression(paste("Host resistance to ", italic("P. infectans"))), pch = 1, cex = df1$count)
abline(0,1, lty = "dashed")
mtext("B", side = 3, at = 0, cex = 2)
res.ros <- 1 - all.sum.ros$vir[all.sum.ros$OrigineH == "PZ"]
res.inf <- 1 - all.sum.inf$vir[all.sum.inf$OrigineH == "PZ"]
lm.corr1 <- lm(res.inf ~ res.ros)
beta <- lm.corr1$coefficients[2]
intercept <- lm.corr1$coefficients[1]
pvalue <- anova(lm.corr1)["Pr(>F)"][1,1]
abline(intercept, beta)
#text(0.6, 0.2, labels = paste("beta=", round(beta,2), "\np =", round(pvalue,2)), cex =1.3)
par(mar=c(1, 1, 2, 1))
plot.mat(m1, expression(paste(italic("P. infectans"), " in Rance")))
plot.mat(m2, expression(paste(italic("P. infectans"), " in Penzé")))
#### CORRELATION BETWEEN RESISTANCE TO INFECTANS OF PZ AND OF RC ####
# (WEAKLY CORRELATED, MORE SO FOR RC HOSTS)
# sort out mean infectivity of each host strain by parasite origin
all.sum2 <- ddply(all[all$SpeciesP=="infectans",], .(SoucheH, OrigineP), summarise, OrigineH = unique(OrigineH),
vir = mean(Virulence, na.rm = T))
all.sum2.RC <- all.sum2[which(all.sum2$OrigineP == "RC"),]
all.sum2.PZ <- all.sum2[which(all.sum2$OrigineP == "PZ"),]
stopifnot(all(all.sum2.RC$SoucheH == all.sum2.PZ$SoucheH)) # in these two tables the host strains are the same
# all host strains together:
# HERE ADJUST SIZE OF POINTS FOR MULTIPLICITY
par(mar=c(4, 4, 2, 1))
df1 <- data.frame(V1 = 1 - all.sum2.RC$vir[all.sum2.RC$OrigineH == "PZ"], V2 = 1 - all.sum2.PZ$vir[all.sum2.PZ$OrigineH == "PZ"])
df1 <- ddply(df1, .(V1, V2), summarise, count = length(V2))
plot(df1$V1, df1$V2, xlim = c(0,1), ylim = c(0,1), las = 1,
xlab = expression(paste("Host resistance to Rance ", italic("P. infectans"))),
ylab = expression(paste("Host resistance to Penzé ", italic("P. infectans"))), pch = 1, cex=df1$count
)
df2 <- data.frame(V1 = 1-all.sum2.RC$vir[all.sum2.RC$OrigineH == "RC"], V2 = 1-all.sum2.PZ$vir[all.sum2.PZ$OrigineH == "RC"])
df2 <- ddply(df2, .(V1, V2), summarise, count = length(V2))
points(df2$V1, df2$V2, pch = 16, cex = df2$count)
legend(0., 1, legend = c("Host from Penzé", "Host from Rance"), pch = c(1, 16))
abline(0,1, lty = "dashed")
mtext("C", side = 3, at = 0, cex = 2)
res.RC <- 1-all.sum2.RC$vir
res.PZ <- 1-all.sum2.PZ$vir
lm.corr2 <- lm(res.PZ ~ res.RC)
beta <- lm.corr2$coefficients[2]
intercept <- lm.corr2$coefficients[1]
pvalue <- anova(lm.corr2)["Pr(>F)"][1,1]
abline(intercept, beta)
summary(lm(all.sum2.RC$vir ~ all.sum2.PZ$vir))
summary(lm(all.sum2.RC$vir ~ all.sum2.PZ$vir))$r.squared
#### CORRELATION FOR INFECTANS OF INFECTIVITY TO PZ AND RESISTANCE TO RC HOSTS ####
all.sum3 <- ddply(all[all$SpeciesP=="infectans",], .(SoucheP, OrigineH), summarise, OrigineP = unique(OrigineP),
vir = mean(Virulence, na.rm = T))
all.sum3.RC <- all.sum3[which(all.sum3$OrigineH == "RC"),]
all.sum3.PZ <- all.sum3[which(all.sum3$OrigineH == "PZ"),]
dim(all.sum3.RC)
dim(all.sum3.PZ)
stopifnot(all(all.sum3.RC$SoucheP == all.sum3.PZ$SoucheP)) # check same strains exactly
# all strains together:
para.PZ <- which(all.sum3.RC$OrigineP == "PZ") # strains from PZ
para.RC <- which(all.sum3.RC$OrigineP == "RC")
par(mar=c(4, 4, 2, 1))
plot(all.sum3.RC$vir[para.PZ], all.sum3.PZ$vir[para.PZ], xlim = c(0,1), ylim = c(0,1), las = 1,
pch = 0, col = "black", xlab = "Parasite infectivity on Rance host", ylab = "Parasite infectivity on Penzé host")
points(all.sum3.RC$vir[para.RC], all.sum3.PZ$vir[para.RC], col = "black", pch = 15)
abline(0,1, lty = "dashed")
mtext("D", side = 3, at = 0, cex = 2)
summary(lm(all.sum3.RC$vir ~ all.sum3.PZ$vir)) # very high correlation
summary(lm(all.sum3.RC$vir ~ all.sum3.PZ$vir))$r.squared
lm.corr3 <- lm(all.sum3.RC$vir ~ all.sum3.PZ$vir)
beta <- lm.corr3$coefficients[2]
intercept <- lm.corr3$coefficients[1]
abline(intercept, beta)
#### CORRELATION FOR ROSTRATA OF INFECTIVITY TO PZ AND RESISTANCE TO RC HOSTS ####
all.sum4 <- ddply(all[all$SpeciesP=="rostrata",], .(SoucheP, OrigineH), summarise, OrigineP = unique(OrigineP),
vir = mean(Virulence, na.rm = T))
all.sum4.RC <- all.sum4[which(all.sum4$OrigineH == "RC"),]
all.sum4.PZ <- all.sum4[which(all.sum4$OrigineH == "PZ"),]
dim(all.sum4.RC)
dim(all.sum4.PZ)
stopifnot(all(all.sum4.RC$SoucheP == all.sum4.PZ$SoucheP)) # check same strains exactly
# all strains together: (all parasite come from PZ anyway)
points(all.sum4.PZ$vir, all.sum4.RC$vir, xlim = c(0,1), ylim = c(0,1),
pch = 3)
legend(0., 1, legend = c(expression(paste(italic("P. infectans"),  " from Penzé")), expression(paste(italic("P. infectans"),  " from Rance")), expression(paste(italic("P. rostrata")))), pch = c(0, 15, 3))
# abline(0,1)
dev.off() #### END OF THE GRAPH
#-----------------------------------------------------------------#
############ save relevant correlation information ################
#-----------------------------------------------------------------#
lm.corr1 # resistance to P. rostrata vs resistance to P. infectans for host PZ
lm.corr2 # resistance to Rance P.infectans vs. Penze P. infectans for all hosts
lm.corr3 # infectivity on Rance host vs. infectivity on Penzé host for all P. infectans parasites
lm.corr4 <- lm(all.sum4.RC$vir ~ all.sum4.PZ$vir) # infectivity on Rance host vs. infectivity on PZ host for rostrata parasites
list.lm <- list(lm.corr1, lm.corr2, lm.corr3, lm.corr4)
tab.summary <- matrix(NA, nrow = 5, ncol = 5)
tab.summary[,1] <- c("lm", "resistance to rostrata vs infectans", "resistance to RC infectans vs PZ infectans", "infectivity infectans on RC vs PZ", "infectivity rostrata on RC vs PZ")
tab.summary[,2] <- c("intercept", unlist(lapply(list.lm, function(this.lm) this.lm$coefficients[1])))
tab.summary[,3] <- c("regression coeff", unlist(lapply(list.lm, function(this.lm) this.lm$coefficients[2])))
tab.summary[,4] <- c("R2", unlist(lapply(list.lm, function(this.lm) summary(this.lm)$r.squared)))
tab.summary[,5] <- c("p-value", unlist(lapply(list.lm, function(this.lm) summary(this.lm)$coefficients[2,4])))
write.table(tab.summary, "~/Dropbox (Personal)/Laure Guillou data/summary.lm.inter.correlations.csv", sep = ",", row.names = F, col.names = F)
tmp <- ddply(all[which(all$SpeciesP == "infectans"),], .(SoucheP, OrigineH), summarise, mean.vir = mean(Virulence, na.rm = T), OrigineP = unique(OrigineP))
median(tmp[tmp$OrigineH == "PZ" & tmp$OrigineP == "PZ","mean.vir"])
tmp
lm0 <- lm(mean.vir ~ OrigineH + OrigineP, data = tmp)
Anova(lm0) # parasite infectans in Rance is crap
tmp2 <- ddply(all[which(all$OrigineP == "PZ" & all$OrigineH == "PZ"),], .(SoucheP), summarise, mean.vir = mean(Virulence, na.rm = T), SpeciesP = unique(SpeciesP))
mean(tmp2$mean.vir[tmp2$SpeciesP == "rostrata"], na.rm = T)
mean(tmp2$mean.vir[tmp2$SpeciesP == "infectans"], na.rm = T)
lm1 <- lm(mean.vir ~ SpeciesP, data = tmp2)
Anova(lm1) # no difference in rostrata versus infectans
tmp2
Anova(lm0) # parasite infectans in Rance is crap
confint(lm0) # 0.2347-0.5460
lm0$coefficients
-0.39036801--0.5460361
-0.39036801--0.2347000
confint(lm0)["OriginePRC",]
summary(lm0)["OriginePRC"]confint(lm0)["OriginePRC",]
summary(lm0)["OriginePRC"] - confint(lm0)["OriginePRC",]
summary(lm0)["OriginePRC"]
lm0$coefficients["OriginePRC"] - confint(lm0)["OriginePRC",]
tmp <- ddply(all[which(all$SpeciesP == "infectans"),], .(SoucheP, OrigineH), summarise, mean.vir = mean(Virulence, na.rm = T), OrigineP = unique(OrigineP))
tmp
tmp <- ddply(all[which(all$SpeciesP == "infectans"),], .(OrigineP, OrigineH), summarise, mean.vir = mean(Virulence, na.rm = T), OrigineP = unique(OrigineP))
lm0 <- lm(mean.vir ~ OrigineH + OrigineP, data = tmp)
Anova(lm0) # parasite infectans in Rance is crap
tmp
lm0$coefficients
summary(lm0)
confint(lm0) # 0.2347-0.5460
lm0$coefficients["OriginePRC"] - confint(lm0)["OriginePRC",] # ± 0.16
tmp
tmp <- ddply(all[which(all$SpeciesP == "infectans"),], .(OrigineP, OrigineH), summarise, mean.vir = mean(Virulence, na.rm = T))
lm0 <- lm(mean.vir ~ OrigineH + OrigineP, data = tmp)
Anova(lm0) # parasite infectans in Rance is crap
lm0$coefficients
summary(lm0)
confint(lm0) # 0.2347-0.5460
lm0$coefficients["OriginePRC"] - confint(lm0)["OriginePRC",] # ± 0.16
lm0 <- lm(mean.vir ~ OrigineP, data = tmp)
Anova(lm0) # parasite infectans in Rance is crap
lm0$coefficients
summary(lm0)
confint(lm0) # 0.2347-0.5460
lm0$coefficients["OriginePRC"] - confint(lm0)["OriginePRC",] # ± 0.16
tmp <- ddply(all[which(all$SpeciesP == "infectans"),], .(OrigineP, OrigineH), summarise, mean.vir = mean(Virulence, na.rm = T))
lm0 <- lm(mean.vir ~ OrigineH + OrigineP, data = tmp)
Anova(lm0) # parasite infectans in Rance is crap
lm0$coefficients
summary(lm0)
confint(lm0) # 0.2347-0.5460
lm0$coefficients["OriginePRC"] - confint(lm0)["OriginePRC",] # ± 0.16
lm0 <- lm(mean.vir ~ OrigineH + OrigineP, data = tmp)
tmp <- ddply(all[which(all$SpeciesP == "infectans"),], .(OrigineP, OrigineH), summarise, mean.vir = mean(Virulence, na.rm = T))
lm0 <- lm(mean.vir ~ OrigineH + OrigineP, data = tmp)
Anova(lm0) # parasite infectans in Rance is crap
tmp2 <- ddply(all[which(all$OrigineP == "PZ" & all$OrigineH == "PZ"),], .(SoucheP), summarise, mean.vir = mean(Virulence, na.rm = T), SpeciesP = unique(SpeciesP))
mean(tmp2$mean.vir[tmp2$SpeciesP == "rostrata"], na.rm = T)
mean(tmp2$mean.vir[tmp2$SpeciesP == "infectans"], na.rm = T)
lm1 <- lm(mean.vir ~ SpeciesP, data = tmp2) # infectivity by parasite species
Anova(lm1) # no difference in rostrata versus infectans
lm1$coefficients
summary(lm1)
confint(lm1)
tmp
rm(list = ls())
library(plyr)
library(lme4)
library(car)
# François Blanquart 24.08.2016
# example code showing the linear model to explain infectivity and resistance as a function of time
setwd("~/Dropbox/Laure Guillou data/draft/submission2/example_code/")
#### LOAD AND CLEAN THE DATA ####
all <- read.csv("all.csv", header = T, sep = ",")
for(colname in c("SoucheH", "OrigineH", "SpeciesP", "OrigineP", "DateP", "SoucheP", "ts", "af.DateP", "af.DateH")){
all[, colname] <- as.factor(all[, colname])
}
all$ts <- relevel(all$ts, ref="-2") # -2 as first level
all$ClusterH <- as.character(all$ClusterH)
all$ClusterH[is.na(all$ClusterH)] <- "NA"
table(is.na(all$Virulence)) # 3711 datapoints
#### FOR EACH COMBINATION OF PARAMETERS RUN THE LINEAR MODEL ####
combi <- matrix(NA, nrow = 3, ncol = 3) # combinations of parasite species, parasite origin, and host origin
combi[1,] <- c("infectans", "PZ", "PZ")
combi[2,] <- c("infectans", "RC", "RC")
combi[3,] <- c("rostrata", "PZ", "PZ")
get.effects <- function(this.glm){
c(
unlist(fitted(this.glm)),
unlist(fixef(this.glm)),
unlist(VarCorr(this.glm))
)
}
ii <- 1
thiscombi <- combi[ii,]
print(thiscombi)
# subset of the data we use:
subset <- which(all$SpeciesP == thiscombi[1] & all$OrigineP == thiscombi[2] & all$OrigineH == thiscombi[3] & !is.na(all$Virulence))
######### THE MAIN GENERALISED LINEAR MIXED MODEL   #########
glm1_4 <- glmer(Virulence ~ af.DateH + af.DateP + as.factor(ClusterH) + as.factor(ts) + (1|SoucheH)+ (1|SoucheP), data = all[subset,], family = binomial)
# there is a warning that convergence failed, but one can check that the gradient is small; changing the optimiser or increasing the number of iterations does not change the ML parameters
# relgrad <- with(glm1_4@optinfo$derivs,solve(Hessian,gradient))
# print(max(abs(relgrad)))
# ML parameters
print("ML parameters")
var.estimates <- unlist(VarCorr(glm1_4))
fixed.estimates <- fixef(glm1_4)
print(fixed.estimates)
print(var.estimates)
######### CONFIDENCE INTERVALS FOR FIXED EFFECTS AND RANDOM EFFECTS BASED ON BOOTSTRAPPING   #########
load(file = paste0("glm_boot_", paste(thiscombi, collapse = ""), ".2.RData")) # this contains the results of glm on bootstrapped data for confidence intervals
# tmp1$t contains is a matrix with 200 lines (the 200 bootstraps)
# and columns corresponding to fitted infectivity values, then the fixed effects, then the variance of the two random effects
# this is the result of running bootMer(glm1_4, get.effects, 200, verbose = T, use.u = T, type = "parametric") -> tmp1
# with get.effects defined above
n.fit <- length(fitted(glm1_4))
n.fixef <- length(fixef(glm1_4))
n.var <- length(VarCorr(glm1_4))
stopifnot(ncol(tmp1$t) == n.fit+n.fixef+n.var) # check the table of bootstrapped parameters is indeed length of fitted values, fixef effects, and variances
stopifnot(n.fit == length(subset))
# confidence intervals for fixed effects and variance effects
bs.var <- tmp1$t[, (n.fit+n.fixef+1):(n.fit+n.fixef+n.var)] # bootstrap intervals on variance
bs.fixef <- tmp1$t[ , (n.fit + 1):(n.fit + n.fixef)] # bootstrap intervals on fixed effects
var.int <- apply(bs.var, 2, quantile, probs = c(0.025, 0.975))
fixed.int <- apply(bs.fixef, 2, quantile, probs = c(0.025, 0.975))
print("confidence intervals")
print(fixed.int)
print(var.int)
######### P VALUES CALCULATIONS   #########
# two glm to test for significance of random effects
glm1_4_2 <- glmer(Virulence ~ af.DateH + af.DateP + as.factor(ClusterH) + as.factor(ts) + (1|SoucheP), data = all[subset,], family = binomial)
glm1_4_3 <- glmer(Virulence ~ af.DateH + af.DateP + as.factor(ClusterH) + as.factor(ts) + (1|SoucheH), data = all[subset,], family = binomial)
anova(glm1_4, glm1_4_2, test = "Chisq")
anova(glm1_4, glm1_4_3, test = "Chisq")
x <- runif(100, 0, 1)
y <- rnorm(100, 0, 1)
lm(y ~ x)
Anova(lm(y ~ x))
anova(lm(y ~ x))
summary(lm(y ~ x))
summary(lm(y ~ x))
summary(lm(x ~ y))
5*115
