Import packages

library(vegan)
library(stringr)
library(VpThemAll)
library(ggplot2)
library(dplyr)

Load data

data(mite)
data(mite.env)

abu <- mite
meta <- mite.env
#Hellinger standardization
abu.std <- decostand(abu, "hellinger")
# Bray-Curtis dissimilarity
dis <- vegdist(abu.std) 

Let’s have a look on how this data look like

#abundance
head(abu)
##   Brachy PHTH HPAV RARD SSTR Protopl MEGR MPRO TVIE HMIN HMIN2 NPRA TVEL ONOV
## 1     17    5    5    3    2       1    4    2    2    1     4    1   17    4
## 2      2    7   16    0    6       0    4    2    0    0     1    3   21   27
## 3      4    3    1    1    2       0    3    0    0    0     6    3   20   17
## 4     23    7   10    2    2       0    4    0    1    2    10    0   18   47
## 5      5    8   13    9    0      13    0    0    0    3    14    3   32   43
## 6     19    7    5    9    3       2    3    0    0   20    16    2   13   38
##   SUCT LCIL Oribatl1 Ceratoz1 PWIL Galumna1 Stgncrs2 HRUF Trhypch1 PPEL NCOR
## 1    9   50        3        1    1        8        0    0        0    0    0
## 2   12  138        6        0    1        3        9    1        1    1    2
## 3   10   89        3        0    2        1        8    0        3    0    2
## 4   17  108       10        1    0        1        2    1        2    1    3
## 5   27    5        1        0    5        2        1    0        1    0    0
## 6   39    3        5        0    1        1        8    0        4    0    1
##   SLAT FSET Lepidzts Eupelops Miniglmn LRUG PLAG2 Ceratoz3 Oppiminu Trimalc2
## 1    0    0        0        0        0    0     0        0        0        0
## 2    2    2        1        0        0    0     0        0        0        0
## 3    0    8        0        0        0    0     0        0        0        0
## 4    2   12        0        0        0    0     0        0        0        0
## 5    0   12        2        0        0    0     0        0        0        0
## 6    0   10        0        0        0    0     0        0        0        0
#metadata
head(meta)
##   SubsDens WatrCont Substrate Shrub    Topo
## 1    39.18   350.15   Sphagn1   Few Hummock
## 2    54.99   434.81    Litter   Few Hummock
## 3    46.07   371.72 Interface   Few Hummock
## 4    48.19   360.50   Sphagn1   Few Hummock
## 5    23.55   204.13   Sphagn1   Few Hummock
## 6    57.32   311.55   Sphagn1   Few Hummock

Select minimal model

First let’s select the minimal model based on ordistep. By selecting minimal model I mean select the variables of metadata that will be kept for testing. If needed, co-linear variables should be removed previous this step.

selected.model <- select.minimal.model(dis, meta)
## 
## Start: dis ~ 1 
## 
##             Df    AIC      F Pr(>F)   
## + WatrCont   1 135.96 32.686  0.005 **
## + Shrub      2 143.28 12.471  0.005 **
## + Topo       1 148.80 15.814  0.005 **
## + Substrate  6 160.50  2.132  0.005 **
## + SubsDens   1 160.53  2.877  0.045 * 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Step: dis ~ WatrCont 
## 
##            Df    AIC      F Pr(>F)   
## - WatrCont  1 161.44 32.686  0.005 **
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
##             Df    AIC      F Pr(>F)   
## + SubsDens   1 130.63 7.3951  0.005 **
## + Shrub      2 130.76 4.6342  0.005 **
## + Topo       1 131.12 6.8743  0.005 **
## + Substrate  6 136.47 1.8428  0.005 **
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Step: dis ~ WatrCont + SubsDens 
## 
##            Df    AIC       F Pr(>F)   
## - SubsDens  1 135.96  7.3951  0.005 **
## - WatrCont  1 160.53 38.6836  0.005 **
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
##             Df    AIC      F Pr(>F)   
## + Topo       1 123.41 9.2884  0.005 **
## + Shrub      2 127.63 3.4175  0.005 **
## + Substrate  6 130.66 1.8961  0.005 **
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Step: dis ~ WatrCont + SubsDens + Topo 
## 
##            Df    AIC       F Pr(>F)   
## - Topo      1 130.63  9.2884  0.005 **
## - SubsDens  1 131.12  9.8192  0.005 **
## - WatrCont  1 146.15 27.9786  0.005 **
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
##             Df    AIC      F Pr(>F)   
## + Shrub      2 121.59 2.7780  0.005 **
## + Substrate  6 124.24 1.7313  0.010 **
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Step: dis ~ WatrCont + SubsDens + Topo + Shrub 
## 
##            Df    AIC       F Pr(>F)   
## - Shrub     2 123.41  2.7780  0.015 * 
## - Topo      1 127.63  7.7953  0.005 **
## - SubsDens  1 127.70  7.8672  0.005 **
## - WatrCont  1 135.65 16.5050  0.005 **
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
##             Df    AIC      F Pr(>F)   
## + Substrate  6 121.59 1.8073  0.005 **
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Step: dis ~ WatrCont + SubsDens + Topo + Shrub + Substrate 
## 
##             Df    AIC       F Pr(>F)   
## - Substrate  6 121.59  1.8073  0.020 * 
## - Shrub      2 124.24  2.8891  0.020 * 
## - Topo       1 127.24  6.7015  0.005 **
## - SubsDens   1 127.42  6.8696  0.005 **
## - WatrCont   1 135.70 15.0071  0.005 **
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

These were the variables selected:

selected.model$sel.m.variables
## [1] "WatrCont"  "SubsDens"  "Topo"      "Shrub"     "Substrate"

Prepare for testing

Before performing the tests, let’s first prepare and list what we want to be tested

Make formula with all the selected variables

var.vec <- selected.model$sel.m.variables
# Make index of full formula
index.to.test <- make.index.full(var.vec)

The structure of index.to.test is below. As it can be seen, it contains the variable to be tested, in this case “full” which states for full formula, the element to be tested “full”, and the resulting var.formula. var.formula is the part of the formula that includes the independent variables. For instance, in the formula Y ~ A + B, the var.formula is A + B.

head(index.to.test)
##    var element                                    var.formula
## 1 full    full WatrCont + SubsDens + Topo + Shrub + Substrate

Make formulas with shared and unique portion of all variables

# Make index for deconfounding all variables in the model

for (var in var.vec){
  index.to.test <- rbind(index.to.test,
                            make.index.varpart(var, var.vec))
}

Perform variation partition tests

Below are some of the tests that we want to perform

head(index.to.test)
##        var element                                               var.formula
## 1     full    full            WatrCont + SubsDens + Topo + Shrub + Substrate
## 2 WatrCont  unique WatrCont + Condition(SubsDens + Topo + Shrub + Substrate)
## 3 WatrCont  shared                                                  WatrCont
## 4 SubsDens  unique SubsDens + Condition(WatrCont + Topo + Shrub + Substrate)
## 5 SubsDens  shared                                                  SubsDens
## 6     Topo  unique Topo + Condition(WatrCont + SubsDens + Shrub + Substrate)

Now that we have listed the tests that we want to perform, let’s Varpart them all!!!

I want to get only the p.values and adj.r.squared. So, I will create columns for colecting them in the data.frame index.to.test.

index.to.test$p.value <- NULL
index.to.test$adj.r.squared <- NULL

Now, with a for loop, I will perform tests row by row, and add the results to the index.to.test table.

#


for (i in 1:nrow(index.to.test)){
  res <- test.varpart.wrap(dis = dis,
                           meta = meta,
                           var.formula = index.to.test$var.formula[i],
                           element = index.to.test$element[i],
                           perm.n = 99) # Permutation number should be used as defaul, 999, but to speed up the calculation I will use 99.
  
  # add to index.to.test
  index.to.test$p.value[i] <- res$p.value
  index.to.test$adj.r.squared[i] <- res$adj.r.squared
}

Please consider if you need/want to correct the p.values for multiple testing. If this is the case:

index.to.test$adj.p.value <- p.adjust(index.to.test$p.value, "fdr")

Inspecting results

Filter the results

Let’s make sure that all variables tested have a significant effect on the community.

For this example, I will use a cutoff on adj.pvalue of 0.02. Let’s keep NA, which is the result of the testing the full formula.

index.to.test.f <-  filter(index.to.test, adj.p.value < 0.02 |
                           is.na(adj.p.value) == T)

Plot the results

ggplot(index.to.test.f, aes(x = element,
                            y = adj.r.squared,
                            fill = var)) +
  geom_bar(stat = "identity") +
  theme_light()

There you are!

Full: Reprensents how much the full formula can explain the community variation. Full formula stands for all variables selected.

Shared: This is how much each variable explain the community variation, without being corrected by other variables.

Unique: This is how much each variable explain the community variation, having corrected for other variables in the test set. This is generally what you may be interested in getting out of this procedure.

Differences between the sum of “unique” and the full model is due to interations, colinearity, and variations explanined by more than one variable.

References

sessionInfo()
## R version 4.0.5 (2021-03-31)
## Platform: x86_64-pc-linux-gnu (64-bit)
## Running under: Ubuntu 18.04.5 LTS
## 
## Matrix products: default
## BLAS:   /usr/lib/x86_64-linux-gnu/blas/libblas.so.3.7.1
## LAPACK: /usr/lib/x86_64-linux-gnu/lapack/liblapack.so.3.7.1
## 
## locale:
##  [1] LC_CTYPE=en_US.UTF-8       LC_NUMERIC=C              
##  [3] LC_TIME=de_DE.UTF-8        LC_COLLATE=en_US.UTF-8    
##  [5] LC_MONETARY=de_DE.UTF-8    LC_MESSAGES=en_US.UTF-8   
##  [7] LC_PAPER=de_DE.UTF-8       LC_NAME=C                 
##  [9] LC_ADDRESS=C               LC_TELEPHONE=C            
## [11] LC_MEASUREMENT=de_DE.UTF-8 LC_IDENTIFICATION=C       
## 
## attached base packages:
## [1] stats     graphics  grDevices utils     datasets  methods   base     
## 
## other attached packages:
## [1] dplyr_1.0.4          ggplot2_3.3.3        VpThemAll_0.0.0.9000
## [4] stringr_1.4.0        vegan_2.5-7          lattice_0.20-41     
## [7] permute_0.9-5        lemon_0.4.5         
## 
## loaded via a namespace (and not attached):
##  [1] Rcpp_1.0.6        highr_0.8         pillar_1.4.7      compiler_4.0.5   
##  [5] plyr_1.8.6        tools_4.0.5       digest_0.6.27     nlme_3.1-152     
##  [9] evaluate_0.14     lifecycle_0.2.0   tibble_3.0.6      gtable_0.3.0     
## [13] mgcv_1.8-33       pkgconfig_2.0.3   rlang_0.4.10      Matrix_1.3-2     
## [17] DBI_1.1.1         parallel_4.0.5    yaml_2.2.1        xfun_0.20        
## [21] gridExtra_2.3     withr_2.4.1       cluster_2.1.1     knitr_1.31       
## [25] generics_0.1.0    vctrs_0.3.6       grid_4.0.5        tidyselect_1.1.0 
## [29] glue_1.4.2        R6_2.5.0          rmarkdown_2.6     farver_2.0.3     
## [33] purrr_0.3.4       magrittr_2.0.1    splines_4.0.5     MASS_7.3-53.1    
## [37] scales_1.1.1      ellipsis_0.3.1    htmltools_0.5.1.1 assertthat_0.2.1 
## [41] colorspace_2.0-0  labeling_0.4.2    stringi_1.5.3     munsell_0.5.0    
## [45] crayon_1.4.1