This code calculates the taxonomic coverage of the SCAR Antarctic Terrestrial Biodiversity Database. Taxonomic coverage is assessed by first assigning taxa to expert-defined functional groups (Priority Threat Management Groups, or ‘PTMs’; Lee et al. 2022), then grouping functional groups into ‘priority’ groups for modelling. These priority groups are also expert-defined (Lee et al. 2022) and separate out groups that are (1) at risk, and (2) have a poorly known geographic distribution. Finally, we count the number of records available per group as a proxy to assess whether that group could be modelled.

Initial setup

Load packages and set working directory.

library(tidyverse)

packages <- c("here", "gridExtra")

walk(packages, require, character.only = T)

here::here()

Load data

Read in the cleaned biodiversity database (cleaned in file “Terrestrial_Antarctic_Biodiversity_Database_Cleaning.Rmd”).

# Load biodiversity data as dataframe
Ant_biodf <- read.csv(here("Data/Ant_Terr_Bio_Data_Uncertainty_Terms_Removed_August_2023.csv"))

Count the number of records per functional group

# dynamicProperties indicates whether a species is vagrant
PTM_count <- count(Ant_biodf, PTM_ID, PTM_NAME, scientificNameClean, dynamicProperties)

# Make sure there's no white space
Ant_biodf$PTM_NAME <- str_trim(Ant_biodf$PTM_NAME)
  
# Remove vagrant species or vagrant records of a species
Ant_biodf <- Ant_biodf %>% 
    filter(dynamicProperties != "Vagrant species")

# Remove those records that weren't able to be grouped into a PTM (PTM ID = ?)
# Now separate out PTM IDs into four columns, as some records have up to four PTMs that they are listed under
Ant_biodf_PTM <- Ant_biodf %>%
  filter(PTM_ID != "?") %>%
  separate(PTM_ID, into = c("PTM_1", "PTM_2", "PTM_3", "PTM_4"), sep = "&") %>%
  .[, 30:33]

# Now count the total number of records per PTM across all columns
Ant_biodf_PTM_count <- data.frame(table(unlist(Ant_biodf_PTM[c("PTM_1", "PTM_2", "PTM_3", "PTM_4")]))) 

# Rename
colnames(Ant_biodf_PTM_count) <- c("PTM_ID", "Freq")

# Setting PTM IDs as numeric
Ant_biodf_PTM_count$PTM_ID <- as.numeric(levels(Ant_biodf_PTM_count$PTM_ID))[Ant_biodf_PTM_count$PTM_ID]

# Summing all records per PTM as some were split across rows
Ant_biodf_PTM_final <- Ant_biodf_PTM_count %>% dplyr::group_by(PTM_ID) %>% 
  dplyr::summarize(Freq = sum(Freq))

Assign functional groups their priority status

Load a .csv file with the two priority grouping variables (‘intactness’ and ‘wallacean’ - whether the group has a wallacean shortfall, i.e. their distribution is poorly known) assigned to each PTM.

# Load priority groupings
PTM_priority <- read.csv(here("Data/PTM_rankings.csv")) 

PTM_priority$PTM_ID <- as.character(PTM_priority$PTM_ID)

Ant_biodf_PTM_final$PTM_ID <- as.character(Ant_biodf_PTM_final$PTM_ID)

# Match functional groups with their priority status
freq_intactness_wallacean <- full_join(Ant_biodf_PTM_final, PTM_priority, by = "PTM_ID")

# Assign NAs as Not assessed
freq_intactness_wallacean$intactness[is.na(freq_intactness_wallacean$intactness)] <- "Not assessed"

freq_intactness_wallacean$wallacean[is.na(freq_intactness_wallacean$wallacean)] <- "Not assessed"

# Remove a small number of vagrant and non-terrestrial species
freq_intactness_wallacean <- freq_intactness_wallacean %>% 
  filter(name != "vagrants") %>% 
  filter(name != "Vagrants") %>% 
  filter(name != "Invasive") %>% 
  filter(name != "Sea spiders") %>% 
  filter(name != "Marine algae") %>% 
  filter(name != "Marine invertebrates") %>% 
  filter(name != "Marine plankton") %>% 
  filter(name != "Marine invertebrates") %>% 
  filter(name != "Marine algae")

# Assign groups to frequency levels
freq_intactness_wallacean <- freq_intactness_wallacean %>% 
  mutate(type = ifelse(Freq < 100, "< 100", "")) %>% 
  mutate(type = ifelse(Freq > 100 & Freq < 500, "> 100 to 500", type)) %>% 
  mutate(type = ifelse(Freq > 500 & Freq < 1000, "> 500 to 1000", type)) %>% 
  mutate(type = ifelse(Freq > 1000, "> 1000", type)) 

# Relabel priority groups for plotting
freq_intactness_wallacean <- freq_intactness_wallacean %>% 
  mutate(Priority = if_else(intactness == "Decline" & wallacean == 1, "Both", 
                            if_else(intactness == "Decline", "At risk", 
                                    if_else(wallacean == 1, "Distribution poorly known", 
                                            if_else(intactness == "Not assessed", "Not assessed", "Neither")))))