This code removes taxonomic uncertainty terms from species in the SCAR Antarctic Terrestrial Biodiversity Database. The output .csv file is then used in subsequent analyses.

Initial setup

Load packages and set working directory. Read in biodiversity database.

library(tidyverse)

packages <- c("bdc", "here")

walk(packages, require, character.only = T)

here::here()

Ant_biodf <- read.csv(here("Data/SCAR_Ant_Terr_Bio_DataBase_FINAL_25-Aug-2023.csv"))

Removing taxonomic uncertainty terms

We used the package bdc (Ribeiro et al. 2022) to identify and remove taxonomic uncertainty terms such as (sp., or cf.). We checked for uncertainty terms at the species, genus, and family level. Each time, the bdc_clean_names function outputs a file “Output/Check/02_parsed_names.csv” which is then reloaded.

*Note the code is commented out for knitting to html

# # Clean scientific name
# bdc_clean_names(Ant_biodf$scientificName, save_outputs = TRUE)
# 
# # Load the cleaned data
# clean_doc <- read.csv(here("Output/Check/02_parsed_names.csv"))
# 
# # Add cleaned scientific name to database
# Ant_biodf_clean <- Ant_biodf %>% mutate(scientificNameClean = clean_doc[,11])
# 
# # Clean species name
# bdc_clean_names(Ant_biodf_clean$species, save_outputs = TRUE)
# 
# # Load the cleaned data
# clean_doc <- read.csv(here("Output/Check/02_parsed_names.csv"))
# 
# # Add cleaned species name to database *note that it capitalises species names
# Ant_biodf_clean <- Ant_biodf_clean %>% mutate(speciesClean = clean_doc[,11])
# 
# # Clean genus name
# bdc_clean_names(Ant_biodf_clean$genus, save_outputs = TRUE)
# 
# # Load the cleaned data
# clean_doc <- read.csv(here("Output/Check/02_parsed_names.csv"))
# 
# # Add cleaned genus name to database
# Ant_biodf_clean <- Ant_biodf_clean %>% mutate(genusClean = clean_doc[,11])
# 
# # Clean family name
# bdc_clean_names(Ant_biodf_clean$family, save_outputs = TRUE)
# 
# # Load the cleaned data
# clean_doc <- read.csv(here("Output/Check/02_parsed_names.csv"))
# 
# # Add cleaned family name to database
# Ant_biodf_clean <- Ant_biodf_clean %>% mutate(familyClean = clean_doc[,11])
# 
# # Have a final run of the scientific name clean so it is ready for the Uncertainty analysis ()
# bdc_clean_names(Ant_biodf$scientificName, save_outputs = TRUE)

# Save the database
# write.csv(Ant_biodf_clean, here("Data/Ant_Terr_Bio_Data_Uncertainty_Terms_Removed_August_2023.csv"))

References

Ribeiro, B.R., Velazco, S.J.E., Guidoni-Martins, K., Tessarolo, G., Jardim, L., Bachman, S.P., Loyola, R., 2022. bdc: A toolkit for standardizing, integrating and cleaning biodiversity data. Methods in Ecology and Evolution 13, 1421–1428. https://doi.org/10.1111/2041-210X.13868

Session information

sessionInfo()
## R version 4.2.3 (2023-03-15 ucrt)
## Platform: x86_64-w64-mingw32/x64 (64-bit)
## Running under: Windows 10 x64 (build 19045)
## 
## Matrix products: default
## 
## locale:
## [1] LC_COLLATE=English_Australia.utf8  LC_CTYPE=English_Australia.utf8   
## [3] LC_MONETARY=English_Australia.utf8 LC_NUMERIC=C                      
## [5] LC_TIME=English_Australia.utf8    
## 
## attached base packages:
## [1] stats     graphics  grDevices utils     datasets  methods   base     
## 
## other attached packages:
##  [1] here_1.0.1      bdc_1.1.4       lubridate_1.9.2 forcats_1.0.0  
##  [5] stringr_1.5.0   dplyr_1.1.1     purrr_1.0.1     readr_2.1.4    
##  [9] tidyr_1.3.0     tibble_3.2.1    ggplot2_3.4.2   tidyverse_2.0.0
## 
## loaded via a namespace (and not attached):
##  [1] fs_1.6.1                 sf_1.0-12                oai_0.4.0               
##  [4] httr_1.4.5               rprojroot_2.0.3          rgbif_3.7.7             
##  [7] tools_4.2.3              bslib_0.4.2              utf8_1.2.3              
## [10] rgdal_1.6-5              R6_2.5.1                 DT_0.27                 
## [13] KernSmooth_2.23-20       rgeos_0.6-2              DBI_1.1.3               
## [16] lazyeval_0.2.2           colorspace_2.1-0         raster_3.6-20           
## [19] withr_2.5.0              sp_1.6-0                 tidyselect_1.2.0        
## [22] curl_5.0.0               compiler_4.2.3           cli_3.6.1               
## [25] xml2_1.3.3               stringfish_0.15.7        sass_0.4.5              
## [28] scales_1.2.1             classInt_0.4-9           proxy_0.4-27            
## [31] askpass_1.1              digest_0.6.31            rmarkdown_2.21          
## [34] contentid_0.0.16         pkgconfig_2.0.3          htmltools_0.5.5         
## [37] dbplyr_2.3.2             fastmap_1.1.1            htmlwidgets_1.6.2       
## [40] rlang_1.1.0              rstudioapi_0.14          jquerylib_0.1.4         
## [43] generics_0.1.3           RApiSerialize_0.1.2      jsonlite_1.8.4          
## [46] magrittr_2.0.3           geosphere_1.5-18         Rcpp_1.0.10             
## [49] munsell_0.5.0            fansi_1.0.4              CoordinateCleaner_2.0-20
## [52] rgnparser_0.2.6          lifecycle_1.0.3          terra_1.7-18            
## [55] stringi_1.7.12           whisker_0.4.1            yaml_2.3.7              
## [58] plyr_1.8.8               grid_4.2.3               parallel_4.2.3          
## [61] lattice_0.20-45          hms_1.1.3                sys_3.4.1               
## [64] knitr_1.42               pillar_1.9.0             codetools_0.2-19        
## [67] glue_1.6.2               evaluate_0.20            data.table_1.14.8       
## [70] RcppParallel_5.1.7       vctrs_0.6.1              tzdb_0.3.0              
## [73] foreach_1.5.2            gtable_0.3.3             openssl_2.0.6           
## [76] qs_0.25.5                cachem_1.0.7             xfun_0.38               
## [79] e1071_1.7-13             rnaturalearth_0.3.2      taxadb_0.2.1            
## [82] class_7.3-21             iterators_1.0.14         memoise_2.0.1           
## [85] units_0.8-1              timechange_0.2.0