library(vegan)
library(stringr)
library(VpThemAll)
library(ggplot2)
library(dplyr)
data(mite)
data(mite.env)
abu <- mite
meta <- mite.env
#Hellinger standardization
abu.std <- decostand(abu, "hellinger")
# Bray-Curtis dissimilarity
dis <- vegdist(abu.std)
Let’s have a look on how this data look like
#abundance
head(abu)
## Brachy PHTH HPAV RARD SSTR Protopl MEGR MPRO TVIE HMIN HMIN2 NPRA TVEL ONOV
## 1 17 5 5 3 2 1 4 2 2 1 4 1 17 4
## 2 2 7 16 0 6 0 4 2 0 0 1 3 21 27
## 3 4 3 1 1 2 0 3 0 0 0 6 3 20 17
## 4 23 7 10 2 2 0 4 0 1 2 10 0 18 47
## 5 5 8 13 9 0 13 0 0 0 3 14 3 32 43
## 6 19 7 5 9 3 2 3 0 0 20 16 2 13 38
## SUCT LCIL Oribatl1 Ceratoz1 PWIL Galumna1 Stgncrs2 HRUF Trhypch1 PPEL NCOR
## 1 9 50 3 1 1 8 0 0 0 0 0
## 2 12 138 6 0 1 3 9 1 1 1 2
## 3 10 89 3 0 2 1 8 0 3 0 2
## 4 17 108 10 1 0 1 2 1 2 1 3
## 5 27 5 1 0 5 2 1 0 1 0 0
## 6 39 3 5 0 1 1 8 0 4 0 1
## SLAT FSET Lepidzts Eupelops Miniglmn LRUG PLAG2 Ceratoz3 Oppiminu Trimalc2
## 1 0 0 0 0 0 0 0 0 0 0
## 2 2 2 1 0 0 0 0 0 0 0
## 3 0 8 0 0 0 0 0 0 0 0
## 4 2 12 0 0 0 0 0 0 0 0
## 5 0 12 2 0 0 0 0 0 0 0
## 6 0 10 0 0 0 0 0 0 0 0
#metadata
head(meta)
## SubsDens WatrCont Substrate Shrub Topo
## 1 39.18 350.15 Sphagn1 Few Hummock
## 2 54.99 434.81 Litter Few Hummock
## 3 46.07 371.72 Interface Few Hummock
## 4 48.19 360.50 Sphagn1 Few Hummock
## 5 23.55 204.13 Sphagn1 Few Hummock
## 6 57.32 311.55 Sphagn1 Few Hummock
First let’s select the minimal model based on ordistep. By selecting minimal model I mean select the variables of metadata that will be kept for testing. If needed, co-linear variables should be removed previous this step.
selected.model <- select.minimal.model(dis, meta)
##
## Start: dis ~ 1
##
## Df AIC F Pr(>F)
## + WatrCont 1 135.96 32.686 0.005 **
## + Shrub 2 143.28 12.471 0.005 **
## + Topo 1 148.80 15.814 0.005 **
## + Substrate 6 160.50 2.132 0.005 **
## + SubsDens 1 160.53 2.877 0.045 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Step: dis ~ WatrCont
##
## Df AIC F Pr(>F)
## - WatrCont 1 161.44 32.686 0.005 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Df AIC F Pr(>F)
## + SubsDens 1 130.63 7.3951 0.005 **
## + Shrub 2 130.76 4.6342 0.005 **
## + Topo 1 131.12 6.8743 0.005 **
## + Substrate 6 136.47 1.8428 0.005 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Step: dis ~ WatrCont + SubsDens
##
## Df AIC F Pr(>F)
## - SubsDens 1 135.96 7.3951 0.005 **
## - WatrCont 1 160.53 38.6836 0.005 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Df AIC F Pr(>F)
## + Topo 1 123.41 9.2884 0.005 **
## + Shrub 2 127.63 3.4175 0.005 **
## + Substrate 6 130.66 1.8961 0.005 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Step: dis ~ WatrCont + SubsDens + Topo
##
## Df AIC F Pr(>F)
## - Topo 1 130.63 9.2884 0.005 **
## - SubsDens 1 131.12 9.8192 0.005 **
## - WatrCont 1 146.15 27.9786 0.005 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Df AIC F Pr(>F)
## + Shrub 2 121.59 2.7780 0.005 **
## + Substrate 6 124.24 1.7313 0.010 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Step: dis ~ WatrCont + SubsDens + Topo + Shrub
##
## Df AIC F Pr(>F)
## - Shrub 2 123.41 2.7780 0.015 *
## - Topo 1 127.63 7.7953 0.005 **
## - SubsDens 1 127.70 7.8672 0.005 **
## - WatrCont 1 135.65 16.5050 0.005 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Df AIC F Pr(>F)
## + Substrate 6 121.59 1.8073 0.005 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Step: dis ~ WatrCont + SubsDens + Topo + Shrub + Substrate
##
## Df AIC F Pr(>F)
## - Substrate 6 121.59 1.8073 0.020 *
## - Shrub 2 124.24 2.8891 0.020 *
## - Topo 1 127.24 6.7015 0.005 **
## - SubsDens 1 127.42 6.8696 0.005 **
## - WatrCont 1 135.70 15.0071 0.005 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
These were the variables selected:
selected.model$sel.m.variables
## [1] "WatrCont" "SubsDens" "Topo" "Shrub" "Substrate"
Before performing the tests, let’s first prepare and list what we want to be tested
var.vec <- selected.model$sel.m.variables
# Make index of full formula
index.to.test <- make.index.full(var.vec)
The structure of index.to.test is below. As it can be seen, it contains the variable to be tested, in this case “full” which states for full formula, the element to be tested “full”, and the resulting var.formula. var.formula is the part of the formula that includes the independent variables. For instance, in the formula Y ~ A + B, the var.formula is A + B.
head(index.to.test)
## var element var.formula
## 1 full full WatrCont + SubsDens + Topo + Shrub + Substrate
Below are some of the tests that we want to perform
head(index.to.test)
## var element var.formula
## 1 full full WatrCont + SubsDens + Topo + Shrub + Substrate
## 2 WatrCont unique WatrCont + Condition(SubsDens + Topo + Shrub + Substrate)
## 3 WatrCont shared WatrCont
## 4 SubsDens unique SubsDens + Condition(WatrCont + Topo + Shrub + Substrate)
## 5 SubsDens shared SubsDens
## 6 Topo unique Topo + Condition(WatrCont + SubsDens + Shrub + Substrate)
Now that we have listed the tests that we want to perform, let’s Varpart them all!!!
I want to get only the p.values and adj.r.squared. So, I will create columns for colecting them in the data.frame index.to.test.
index.to.test$p.value <- NULL
index.to.test$adj.r.squared <- NULL
Now, with a for loop, I will perform tests row by row, and add the results to the index.to.test table.
#
for (i in 1:nrow(index.to.test)){
res <- test.varpart.wrap(dis = dis,
meta = meta,
var.formula = index.to.test$var.formula[i],
element = index.to.test$element[i],
perm.n = 99) # Permutation number should be used as defaul, 999, but to speed up the calculation I will use 99.
# add to index.to.test
index.to.test$p.value[i] <- res$p.value
index.to.test$adj.r.squared[i] <- res$adj.r.squared
}
Please consider if you need/want to correct the p.values for multiple testing. If this is the case:
index.to.test$adj.p.value <- p.adjust(index.to.test$p.value, "fdr")
Let’s make sure that all variables tested have a significant effect on the community.
For this example, I will use a cutoff on adj.pvalue of 0.02. Let’s keep NA, which is the result of the testing the full formula.
index.to.test.f <- filter(index.to.test, adj.p.value < 0.02 |
is.na(adj.p.value) == T)
ggplot(index.to.test.f, aes(x = element,
y = adj.r.squared,
fill = var)) +
geom_bar(stat = "identity") +
theme_light()
There you are!
Full: Reprensents how much the full formula can explain the community variation. Full formula stands for all variables selected.
Shared: This is how much each variable explain the community variation, without being corrected by other variables.
Unique: This is how much each variable explain the community variation, having corrected for other variables in the test set. This is generally what you may be interested in getting out of this procedure.
Differences between the sum of “unique” and the full model is due to interations, colinearity, and variations explanined by more than one variable.
sessionInfo()
## R version 4.0.5 (2021-03-31)
## Platform: x86_64-pc-linux-gnu (64-bit)
## Running under: Ubuntu 18.04.5 LTS
##
## Matrix products: default
## BLAS: /usr/lib/x86_64-linux-gnu/blas/libblas.so.3.7.1
## LAPACK: /usr/lib/x86_64-linux-gnu/lapack/liblapack.so.3.7.1
##
## locale:
## [1] LC_CTYPE=en_US.UTF-8 LC_NUMERIC=C
## [3] LC_TIME=de_DE.UTF-8 LC_COLLATE=en_US.UTF-8
## [5] LC_MONETARY=de_DE.UTF-8 LC_MESSAGES=en_US.UTF-8
## [7] LC_PAPER=de_DE.UTF-8 LC_NAME=C
## [9] LC_ADDRESS=C LC_TELEPHONE=C
## [11] LC_MEASUREMENT=de_DE.UTF-8 LC_IDENTIFICATION=C
##
## attached base packages:
## [1] stats graphics grDevices utils datasets methods base
##
## other attached packages:
## [1] dplyr_1.0.4 ggplot2_3.3.3 VpThemAll_0.0.0.9000
## [4] stringr_1.4.0 vegan_2.5-7 lattice_0.20-41
## [7] permute_0.9-5 lemon_0.4.5
##
## loaded via a namespace (and not attached):
## [1] Rcpp_1.0.6 highr_0.8 pillar_1.4.7 compiler_4.0.5
## [5] plyr_1.8.6 tools_4.0.5 digest_0.6.27 nlme_3.1-152
## [9] evaluate_0.14 lifecycle_0.2.0 tibble_3.0.6 gtable_0.3.0
## [13] mgcv_1.8-33 pkgconfig_2.0.3 rlang_0.4.10 Matrix_1.3-2
## [17] DBI_1.1.1 parallel_4.0.5 yaml_2.2.1 xfun_0.20
## [21] gridExtra_2.3 withr_2.4.1 cluster_2.1.1 knitr_1.31
## [25] generics_0.1.0 vctrs_0.3.6 grid_4.0.5 tidyselect_1.1.0
## [29] glue_1.4.2 R6_2.5.0 rmarkdown_2.6 farver_2.0.3
## [33] purrr_0.3.4 magrittr_2.0.1 splines_4.0.5 MASS_7.3-53.1
## [37] scales_1.1.1 ellipsis_0.3.1 htmltools_0.5.1.1 assertthat_0.2.1
## [41] colorspace_2.0-0 labeling_0.4.2 stringi_1.5.3 munsell_0.5.0
## [45] crayon_1.4.1