This code calculates the environmental coverage of the SCAR Antarctic Terrestrial Biodiversity Database. Environmental coverage is defined according to the methods of Mesgaran et al. (2014), who provide an approach to assess data coverage in both univariate (single environmental variables) and multivariate (combinations of environmental variables) space. We define the environment of terrestrial Antarctica according to ten environmental variables (see Appendices for layer descriptions).

Initial setup

Load packages and set working directory.

library(tidyverse)

packages <- c("here", "sf", "terra", "raster", "sp", "viridis", "dsmextra", "gridExtra")

walk(packages, require, character.only = T)

here::here()

Load source code

We have adapted source code from the dsmextra package (Bouchet et al., 2020) so we call that here.

# Load source code for compute_extrapolation function

source(here("Additional_functions/compute_extrapolation_function.R"))

Read in and resample environmental variables

We resample three variables (solar, slope, roughness) via bilinear interpolation so that all rasters have matching extents.

elev <- rast(here("Data/REMA_Elevation_bm.tif")) 
cloud <- rast(here("Data/mean_cloud_bm.tif"))
wind <- rast(here("Data/mean_wind_bm.tif")) 
temp <- rast(here("Data/MeanTemp_1415_bm.tif")) 
melt <- rast(here("Data/Melt_sum_bm.tif"))
solar <- rast(here("Data/REMA_1km_solarB_RESAMPLED.tif")) 
slope <- rast(here("Data/REMA_1km_slopeB_RESAMPLED.tif")) 
roughness <- rast(here("Data/REMA_1km_rugosityB_RESAMPLED.tif"))
degree_days <- rast(here("Data/Total_DD_1415_minus5_bm.tif")) 
precip <- rast(here("Data/Total_Precip_1415_bm.tif")) 

cov.stk <- c(elev, cloud, wind, temp, melt, solar, slope, roughness, degree_days, precip)

Load biodiversity data and environmental variables

Read in the cleaned biodiversity database (cleaned in file “Terrestrial_Antarctic_Biodiversity_Database_Cleaning.Rmd”).

# Load biodiversity data as dataframe
Ant_biodf <- read.csv(here("Data/Ant_Terr_Bio_Data_Uncertainty_Terms_Removed_August_2023.csv"))

# Convert biodiversity df to sf polygon
Ant_bio <- st_as_sf(Ant_biodf, 
                    coords = c("decimalLongitude", "decimalLatitude"), 
                    crs = 4326) #set as WGS 1984

# Project to WGS_1984_Stereographic_South_Pole
Ant_bio <- st_transform(Ant_bio, 3031) 

# Define projected coordinate system
crs <- sp::CRS("+proj=stere +lat_0=-90 +lat_ts=-71 +lon_0=0 +x_0=0 +y_0=0 +datum=WGS84 +units=m +no_defs")

# Define environmental covariates of interest
covariates <- c("elev", "cloud", "wind", "temp", "melt", "solar", "slope", "roughness", "degree_days", "precip")

# Load the Antarctic coastline 
coast <- st_read(here("Data/add_coastline_high_res_polygon_v7_5.shp"), crs = 3031)

# Convert to SpatVector for extract function
coastline_SPVE <- vect(coast)

Set up environmental coverage function

This function runs separately across each Antarctic Conservation Bioregion (Terauds and Lee 2016).

# Set up the final list
output <- list()

# Set up the final dataframe to save all dissimilarity values across ACBRs
ExDet_df <- data.frame(x = numeric(), 
                       y = numeric(), 
                       ExDet = numeric(), 
                       mic_univariate = numeric(), 
                       mic_combinatorial = numeric(),
                       mic = numeric(),
                       source = character(),
                       ACBR = factor())
                 

# Can't do ACBR 2 because solar radiation doesn't extend to here, run separately below.
ACBRS <- list(1, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16)

# Read in the Antarctic Conservation Bioregion polygons
ACBR_polygons <- st_read(here("Data/ACBRs_v2_2016.shp"), crs = 3031)

# Create a 1km raster with all values as 1
grid <- rast(ext = ext(wind),
                   resolution = 1000, 
                   vals = 1) 

crs(grid) <- "epsg:3031"

# Set up the environmental coverage function
run_env_analysis <- function(z) {
  
  print(paste0("ACBR_number_", z))
  
  # Filter ACBR polygons by target ACBR
  ACBR_polygons <- ACBR_polygons %>% 
    filter(ACBR_ID == z)
  
  # Filter bio points by ACBR
  Ant_bio_filter <- st_filter(Ant_bio, ACBR_polygons)
  
  ACBR_SPVE <- vect(ACBR_polygons)
  
  Ant_bio_filter_SPVE <- vect(Ant_bio_filter)
  
  # Make mask for ACBR from grid so it's at the same resolution
  ACBR_mask <- terra::mask(grid, ACBR_SPVE)
  
  # Extract enviro. covariate values across whole ACBR
  env_cov <- terra::mask(cov.stk, ACBR_mask,
                          maskvalues = NA, updatevalue = NA)
  
  
  # Extract enviro. covariate values across areas with bio data
  bio_pts <- terra::extract(env_cov,
                             Ant_bio_filter_SPVE,
                             method = 'simple',
                             xy = T) %>% 
    .[,-1]
  
  colnames(bio_pts) <- c("elev", "cloud", "wind", "temp", "melt", "solar", "slope", "roughness", "degree_days", "precip", "x", "y")
  
  env_cov <- as.data.frame(env_cov, 
                            xy = T, 
                            na.rm = NA)
  
  # Remove the cells from wider ACBR that are covered by bio data so you just retain novel enviro. space
  env_cov_tidy <- dplyr::anti_join(env_cov, bio_pts, by = c("x", "y"))
  
  colnames(env_cov_tidy) <- c("x", "y", "elev", "cloud", "wind", "temp", "melt", "solar", "slope","roughness", "degree_days", "precip")
  
  # Compute extrapolation 
  extrap <- compute_extrapolation_func(samples = bio_pts,
                                  covariate.names = covariates,
                                  prediction.grid = env_cov_tidy,
                                  coordinate.system = crs)
  
  
  # Add to output list
  output <<- c(output, extrap)
  
  # Obtain the output summary
  all <- extrap$data$all
  
  # Join back to all environmental areas where there's no bio points, including those areas removed because they had an NA for enviro. data. These areas are renamed as "no_enviro".
  output_df <- full_join(all, env_cov_tidy, by = c("x", "y")) %>% 
    .[, -c(7:16)] %>% 
    mutate(source = with(., ifelse(is.na(ExDet), "no_enviro", "enviro")))
  
  # Now add in the locations where bio points are
  output_df <- full_join(output_df, bio_pts, by = c("x", "y")) 
  
  # Remove duplicated bio points from the same raster cell
  duplicates <- duplicated(output_df[c("x", "y")], fromLast = T)
  output_df <- output_df[!duplicates, ]
  
  # Name the source of cells where bio points are
  output_df <- output_df %>% 
    mutate(source = with(., ifelse(!is.na(degree_days), "bio_pts", source))) %>% 
    .[, -c(8:17)] %>% 
    mutate(ACBR = z)
  
  ExDet_df <<- rbind(ExDet_df, output_df)
  
}

Run the environmental coverage function

# Run the function across all ACBRs except 2
map(ACBRS, run_env_analysis)

Summarise outputs

# Extract and save outputs
ACBR1 <- output[1:8]
ACBR3 <- output[9:16]
ACBR4 <- output[17:24]
ACBR5 <- output[25:32]
ACBR6 <- output[33:40]
ACBR7 <- output[41:48]
ACBR8 <- output[49:56]
ACBR9 <- output[57:64]
ACBR10 <- output[65:72]
ACBR11 <- output[73:80]
ACBR12 <- output[81:88]
ACBR13 <- output[89:96]
ACBR14 <- output[97:104]
ACBR15 <- output[105:112]
ACBR16 <- output[113:120]

Set up and run the environmental coverage analysis for just ACBR 2

# Set up the environmental coverage function
run_env_analysis_ACBR2 <- function(z) {
  
  cov.stk2 <- c(cloud, wind, temp, melt, degree_days, precip)
  
  # Filter ACBR polygons by target ACBR
  ACBR_polygons <- ACBR_polygons %>% 
    filter(ACBR_ID == z)
  
  # Filter bio points by ACBR
  Ant_bio_filter <- st_filter(Ant_bio, ACBR_polygons)
  
  ACBR_SPVE <- vect(ACBR_polygons)
  
 
  Ant_bio_filter_SPVE <- vect(Ant_bio_filter)
  
  # Make mask for ACBR from grid so it's at the same resolution
  ACBR_mask <- terra::mask(grid, ACBR_SPVE)
  
  # Extract enviro. covariate values across whole ACBR
  env_cov <- terra::mask(cov.stk2, ACBR_mask,
                          maskvalues = NA, updatevalue = NA)
  
  # Extract enviro. covariate values across areas with bio data
  bio_pts <- terra::extract(env_cov,
                             Ant_bio_filter_SPVE,
                             method = 'simple',
                             xy = T) %>% 
    .[,-1]
  
  
  colnames(bio_pts) <- c("cloud", "wind", "temp", "melt", "degree_days", "precip", "x", "y")
  
  env_cov <- as.data.frame(env_cov, 
                            xy = T, 
                            na.rm = NA)
  
  # Remove the cells from wider ACBR that are covered by bio data so you just retain novel enviro. space
  env_cov_tidy <- dplyr::anti_join(env_cov, bio_pts, by = c("x", "y"))
  
  colnames(env_cov_tidy) <- c("x", "y", "cloud", "wind", "temp", "melt", "degree_days", "precip")
  
  # Compute extrapolation 
  extrap <- compute_extrapolation_func(samples = bio_pts,
                                  covariate.names = covariates2,
                                  prediction.grid = env_cov_tidy,
                                  coordinate.system = crs)
  
  # Add to output
  output2 <<- extrap
  
  # Obtain the output summary
  all <- output2$data$all
  
  # Join back to all environmental areas where there's no bio points, including those areas removed because they had an NA for enviro. data. These areas are renamed as "no_enviro".
  output_df <- full_join(all, env_cov_tidy, by = c("x", "y")) %>% 
    .[, -c(7:12)] %>% 
    mutate(source = with(., ifelse(is.na(ExDet), "no_enviro", "enviro")))
  
  # Now add in the locations where bio points are
  output_df <- full_join(output_df, bio_pts, by = c("x", "y")) 
  
  # Remove duplicated bio points from the same raster cell
  duplicates <- duplicated(output_df[c("x", "y")], fromLast = T)
  output_df <- output_df[!duplicates, ]
  
   # Name the source of cells where bio points are
  output_df <- output_df %>% 
    mutate(source = with(., ifelse(!is.na(degree_days), "bio_pts", source))) %>% 
    .[, -c(8:13)] %>% 
    mutate(ACBR = z)
  
  ExDet_df <<- rbind(ExDet_df, output_df)
  
}
# Remove solar, elev, roughness, and slope as they do not go up to where ACBR 2 is
covariates2 <- c("cloud", "wind", "temp", "melt","degree_days", "precip")

ACBRS <- list(2)

# Run just ACBR 2
map(ACBRS, run_env_analysis_ACBR2)

# Save output
ACBR2 <- output2

Save results for all ACBRs

ACBR_results <- list(ACBR1, ACBR2, ACBR3, ACBR4, ACBR5, ACBR6, ACBR7, ACBR8, ACBR9, ACBR10, ACBR11, ACBR12, ACBR13, ACBR14, ACBR15, ACBR16)

ACBR_names <- c(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16)

Generate combined rasters for analogue, univariate, and multivariate

The combined dataframe has a column called “val_source” that is analogue, multivariate, or univariate, or specifies that the cell had no environmental data for at least one layer so was removed from the analysis (“no_enviro”).

# Load the Antarctic coastline for plotting
coastline <- st_read(here("Data/add_coastline_high_res_polygon_v7_5.shp"), crs = 3031)

# Create the final dataframe to save all dissimilarity values across ACBRs
# If there's a value for uni and multi, choose multi
# Otherwise, if uni has a value add that, or multi or analogue, otherwise NA
joined_df_final <- ExDet_df %>% 
  mutate(val_source = with(., ifelse(!is.na(mic_univariate) & !is.na(mic_combinatorial), "multivariate", 
                                            ifelse(!is.na(mic_univariate), "univariate",
                                                   ifelse(!is.na(mic_combinatorial), "multivariate", 
                                                          "analogue"))))) %>% 
  mutate(val_source = with(., ifelse(source == "bio_pts", "bio_pts", 
                                     ifelse(source == "no_enviro", "no_enviro", 
                                            val_source))))

# Extract just the columns of interest
joined_df_source <- joined_df_final %>% 
  dplyr::select(x, y, val_source) 

joined_df_final <- joined_df_final %>% 
  dplyr::select(x, y, ExDet, ACBR, val_source) 
  
joined_df_source$val_source <- as.factor(joined_df_source$val_source)

# Set values to indices
# 1 = analogue, 2 = multivariate, 3 = no_enviro, 4 = univariate
joined_df_source$ExDet_source = as.numeric(joined_df_source$val_source)

# Save as rasters
rasterall_source <- rast(joined_df_source[,c(1:2, 4)], type = "xyz")

rasterall_ExDet <- rast(joined_df_final[,1:3], type = "xyz")

Summarise the percentage coverage of analogue, univariate and multivariate conditions and cells with biological records

summary_source_df <- count(joined_df_final, ACBR, val_source) %>% 
  group_by(ACBR) %>% 
  mutate(record_sum = sum(n)) %>% 
  mutate(percentage = (n/record_sum)*100)

summary_source_df %>% 
  grid.table()

# write.csv(summary_source_df, here("Outputs/summary_source_df.csv"), overwrite = T)

Save the rasters at 1km resolution and 10km for visualisation

# 1 km resolution

# writeRaster(rasterall_source, here("Outputs/ACBR_raster_ALL_source.asc"), overwrite = T)

# writeRaster(rasterall_ExDet, here("Outputs/ACBR_raster_ALL_ExDet.asc"), overwrite = T)

# write.csv(rasterall_source, here("Outputs/ACBR_source_ALL_df.csv"), overwrite = T)
# write.csv(rasterall_ExDet, here("Outputs/ACBR_ExDet_ALL_df.csv"), overwrite = T)


# 10 km resolution for plotting
coastline_SPVE <- vect(coastline)

 # Create a 10 km raster
 grid <- rast(ext = ext(coastline_SPVE),
              resolution = 10000,
              vals = NA)

crs(grid) <- "epsg:3031"

rasterall_10km <- resample(rasterall_source, grid, method = "bilinear")

# writeRaster(rasterall_source, here("Outputs/ACBR_raster_ALL_source_10km.asc"), overwrite = T)

References

Bouchet, P.J., Miller, D.L., Roberts, J.J., Mannocci, L., Harris, C.M., Thomas, L., 2020. dsmextra: Extrapolation assessment tools for density surface models. Methods in Ecology and Evolution 11, 1464–1469. https://doi.org/10.1111/2041-210X.13469

Mesgaran, M.B., Cousens, R.D., Webber, B.L., 2014. Here be dragons: a tool for quantifying novelty due to covariate range and correlation change when projecting species distribution models. Diversity and Distributions 20, 1147–1159. https://doi.org/10.1111/ddi.12209

Terauds, A., Lee, J.R., 2016. Antarctic biogeography revisited: updating the Antarctic Conservation Biogeographic Regions. Diversity and Distributions 22, 836–840. https://doi.org/10.1111/ddi.12453

Session information

sessionInfo()
## R version 4.2.3 (2023-03-15 ucrt)
## Platform: x86_64-w64-mingw32/x64 (64-bit)
## Running under: Windows 10 x64 (build 19045)
## 
## Matrix products: default
## 
## locale:
## [1] LC_COLLATE=English_Australia.utf8  LC_CTYPE=English_Australia.utf8   
## [3] LC_MONETARY=English_Australia.utf8 LC_NUMERIC=C                      
## [5] LC_TIME=English_Australia.utf8    
## 
## attached base packages:
## [1] stats     graphics  grDevices utils     datasets  methods   base     
## 
## other attached packages:
##  [1] gridExtra_2.3     dsmextra_1.1.5    viridis_0.6.2     viridisLite_0.4.1
##  [5] raster_3.6-20     sp_1.6-0          terra_1.7-18      sf_1.0-12        
##  [9] here_1.0.1        lubridate_1.9.2   forcats_1.0.0     stringr_1.5.0    
## [13] dplyr_1.1.1       purrr_1.0.1       readr_2.1.4       tidyr_1.3.0      
## [17] tibble_3.2.1      ggplot2_3.4.2     tidyverse_2.0.0  
## 
## loaded via a namespace (and not attached):
##  [1] Rcpp_1.0.10        lattice_0.20-45    class_7.3-21       rprojroot_2.0.3   
##  [5] digest_0.6.31      utf8_1.2.3         R6_2.5.1           evaluate_0.20     
##  [9] e1071_1.7-13       highr_0.10         pillar_1.9.0       rlang_1.1.0       
## [13] rstudioapi_0.14    jquerylib_0.1.4    rmarkdown_2.21     rgdal_1.6-5       
## [17] htmlwidgets_1.6.2  munsell_0.5.0      proxy_0.4-27       compiler_4.2.3    
## [21] xfun_0.38          pkgconfig_2.0.3    htmltools_0.5.5    tidyselect_1.2.0  
## [25] lpSolve_5.6.18     codetools_0.2-19   fansi_1.0.4        tzdb_0.3.0        
## [29] withr_2.5.0        grid_4.2.3         jsonlite_1.8.4     gtable_0.3.3      
## [33] lifecycle_1.0.3    DBI_1.1.3          magrittr_2.0.3     units_0.8-1       
## [37] scales_1.2.1       KernSmooth_2.23-20 cli_3.6.1          stringi_1.7.12    
## [41] cachem_1.0.7       leaflet_2.1.2      bslib_0.4.2        generics_0.1.3    
## [45] vctrs_0.6.1        pbmcapply_1.5.1    tools_4.2.3        glue_1.6.2        
## [49] hms_1.1.3          crosstalk_1.2.0    parallel_4.2.3     fastmap_1.1.1     
## [53] yaml_2.3.7         timechange_0.2.0   colorspace_2.1-0   classInt_0.4-9    
## [57] knitr_1.42         sass_0.4.5