This code removes taxonomic uncertainty terms from species in the SCAR Antarctic Terrestrial Biodiversity Database. The output .csv file is then used in subsequent analyses.
Load packages and set working directory. Read in biodiversity database.
library(tidyverse)
packages <- c("bdc", "here")
walk(packages, require, character.only = T)
here::here()
Ant_biodf <- read.csv(here("Data/SCAR_Ant_Terr_Bio_DataBase_FINAL_25-Aug-2023.csv"))
We used the package bdc
(Ribeiro et al. 2022) to
identify and remove taxonomic uncertainty terms such as (sp., or cf.).
We checked for uncertainty terms at the species, genus, and family
level. Each time, the bdc_clean_names
function outputs a
file “Output/Check/02_parsed_names.csv” which is then reloaded.
*Note the code is commented out for knitting to html
# # Clean scientific name
# bdc_clean_names(Ant_biodf$scientificName, save_outputs = TRUE)
#
# # Load the cleaned data
# clean_doc <- read.csv(here("Output/Check/02_parsed_names.csv"))
#
# # Add cleaned scientific name to database
# Ant_biodf_clean <- Ant_biodf %>% mutate(scientificNameClean = clean_doc[,11])
#
# # Clean species name
# bdc_clean_names(Ant_biodf_clean$species, save_outputs = TRUE)
#
# # Load the cleaned data
# clean_doc <- read.csv(here("Output/Check/02_parsed_names.csv"))
#
# # Add cleaned species name to database *note that it capitalises species names
# Ant_biodf_clean <- Ant_biodf_clean %>% mutate(speciesClean = clean_doc[,11])
#
# # Clean genus name
# bdc_clean_names(Ant_biodf_clean$genus, save_outputs = TRUE)
#
# # Load the cleaned data
# clean_doc <- read.csv(here("Output/Check/02_parsed_names.csv"))
#
# # Add cleaned genus name to database
# Ant_biodf_clean <- Ant_biodf_clean %>% mutate(genusClean = clean_doc[,11])
#
# # Clean family name
# bdc_clean_names(Ant_biodf_clean$family, save_outputs = TRUE)
#
# # Load the cleaned data
# clean_doc <- read.csv(here("Output/Check/02_parsed_names.csv"))
#
# # Add cleaned family name to database
# Ant_biodf_clean <- Ant_biodf_clean %>% mutate(familyClean = clean_doc[,11])
#
# # Have a final run of the scientific name clean so it is ready for the Uncertainty analysis ()
# bdc_clean_names(Ant_biodf$scientificName, save_outputs = TRUE)
# Save the database
# write.csv(Ant_biodf_clean, here("Data/Ant_Terr_Bio_Data_Uncertainty_Terms_Removed_August_2023.csv"))
Ribeiro, B.R., Velazco, S.J.E., Guidoni-Martins, K., Tessarolo, G., Jardim, L., Bachman, S.P., Loyola, R., 2022. bdc: A toolkit for standardizing, integrating and cleaning biodiversity data. Methods in Ecology and Evolution 13, 1421–1428. https://doi.org/10.1111/2041-210X.13868
sessionInfo()
## R version 4.2.3 (2023-03-15 ucrt)
## Platform: x86_64-w64-mingw32/x64 (64-bit)
## Running under: Windows 10 x64 (build 19045)
##
## Matrix products: default
##
## locale:
## [1] LC_COLLATE=English_Australia.utf8 LC_CTYPE=English_Australia.utf8
## [3] LC_MONETARY=English_Australia.utf8 LC_NUMERIC=C
## [5] LC_TIME=English_Australia.utf8
##
## attached base packages:
## [1] stats graphics grDevices utils datasets methods base
##
## other attached packages:
## [1] here_1.0.1 bdc_1.1.4 lubridate_1.9.2 forcats_1.0.0
## [5] stringr_1.5.0 dplyr_1.1.1 purrr_1.0.1 readr_2.1.4
## [9] tidyr_1.3.0 tibble_3.2.1 ggplot2_3.4.2 tidyverse_2.0.0
##
## loaded via a namespace (and not attached):
## [1] fs_1.6.1 sf_1.0-12 oai_0.4.0
## [4] httr_1.4.5 rprojroot_2.0.3 rgbif_3.7.7
## [7] tools_4.2.3 bslib_0.4.2 utf8_1.2.3
## [10] rgdal_1.6-5 R6_2.5.1 DT_0.27
## [13] KernSmooth_2.23-20 rgeos_0.6-2 DBI_1.1.3
## [16] lazyeval_0.2.2 colorspace_2.1-0 raster_3.6-20
## [19] withr_2.5.0 sp_1.6-0 tidyselect_1.2.0
## [22] curl_5.0.0 compiler_4.2.3 cli_3.6.1
## [25] xml2_1.3.3 stringfish_0.15.7 sass_0.4.5
## [28] scales_1.2.1 classInt_0.4-9 proxy_0.4-27
## [31] askpass_1.1 digest_0.6.31 rmarkdown_2.21
## [34] contentid_0.0.16 pkgconfig_2.0.3 htmltools_0.5.5
## [37] dbplyr_2.3.2 fastmap_1.1.1 htmlwidgets_1.6.2
## [40] rlang_1.1.0 rstudioapi_0.14 jquerylib_0.1.4
## [43] generics_0.1.3 RApiSerialize_0.1.2 jsonlite_1.8.4
## [46] magrittr_2.0.3 geosphere_1.5-18 Rcpp_1.0.10
## [49] munsell_0.5.0 fansi_1.0.4 CoordinateCleaner_2.0-20
## [52] rgnparser_0.2.6 lifecycle_1.0.3 terra_1.7-18
## [55] stringi_1.7.12 whisker_0.4.1 yaml_2.3.7
## [58] plyr_1.8.8 grid_4.2.3 parallel_4.2.3
## [61] lattice_0.20-45 hms_1.1.3 sys_3.4.1
## [64] knitr_1.42 pillar_1.9.0 codetools_0.2-19
## [67] glue_1.6.2 evaluate_0.20 data.table_1.14.8
## [70] RcppParallel_5.1.7 vctrs_0.6.1 tzdb_0.3.0
## [73] foreach_1.5.2 gtable_0.3.3 openssl_2.0.6
## [76] qs_0.25.5 cachem_1.0.7 xfun_0.38
## [79] e1071_1.7-13 rnaturalearth_0.3.2 taxadb_0.2.1
## [82] class_7.3-21 iterators_1.0.14 memoise_2.0.1
## [85] units_0.8-1 timechange_0.2.0