This code calculates the environmental coverage of the SCAR Antarctic Terrestrial Biodiversity Database. Environmental coverage is defined according to the methods of Mesgaran et al. (2014), who provide an approach to assess data coverage in both univariate (single environmental variables) and multivariate (combinations of environmental variables) space. We define the environment of terrestrial Antarctica according to ten environmental variables (see Appendices for layer descriptions).
Load packages and set working directory.
library(tidyverse)
packages <- c("here", "sf", "terra", "raster", "sp", "viridis", "dsmextra", "gridExtra")
walk(packages, require, character.only = T)
here::here()
We have adapted source code from the dsmextra package (Bouchet et al., 2020) so we call that here.
# Load source code for compute_extrapolation function
source(here("Additional_functions/compute_extrapolation_function.R"))
We resample three variables (solar, slope, roughness) via bilinear interpolation so that all rasters have matching extents.
elev <- rast(here("Data/REMA_Elevation_bm.tif"))
cloud <- rast(here("Data/mean_cloud_bm.tif"))
wind <- rast(here("Data/mean_wind_bm.tif"))
temp <- rast(here("Data/MeanTemp_1415_bm.tif"))
melt <- rast(here("Data/Melt_sum_bm.tif"))
solar <- rast(here("Data/REMA_1km_solarB_RESAMPLED.tif"))
slope <- rast(here("Data/REMA_1km_slopeB_RESAMPLED.tif"))
roughness <- rast(here("Data/REMA_1km_rugosityB_RESAMPLED.tif"))
degree_days <- rast(here("Data/Total_DD_1415_minus5_bm.tif"))
precip <- rast(here("Data/Total_Precip_1415_bm.tif"))
cov.stk <- c(elev, cloud, wind, temp, melt, solar, slope, roughness, degree_days, precip)
Read in the cleaned biodiversity database (cleaned in file “Terrestrial_Antarctic_Biodiversity_Database_Cleaning.Rmd”).
# Load biodiversity data as dataframe
Ant_biodf <- read.csv(here("Data/Ant_Terr_Bio_Data_Uncertainty_Terms_Removed_August_2023.csv"))
# Convert biodiversity df to sf polygon
Ant_bio <- st_as_sf(Ant_biodf,
coords = c("decimalLongitude", "decimalLatitude"),
crs = 4326) #set as WGS 1984
# Project to WGS_1984_Stereographic_South_Pole
Ant_bio <- st_transform(Ant_bio, 3031)
# Define projected coordinate system
crs <- sp::CRS("+proj=stere +lat_0=-90 +lat_ts=-71 +lon_0=0 +x_0=0 +y_0=0 +datum=WGS84 +units=m +no_defs")
# Define environmental covariates of interest
covariates <- c("elev", "cloud", "wind", "temp", "melt", "solar", "slope", "roughness", "degree_days", "precip")
# Load the Antarctic coastline
coast <- st_read(here("Data/add_coastline_high_res_polygon_v7_5.shp"), crs = 3031)
# Convert to SpatVector for extract function
coastline_SPVE <- vect(coast)
This function runs separately across each Antarctic Conservation Bioregion (Terauds and Lee 2016).
# Set up the final list
output <- list()
# Set up the final dataframe to save all dissimilarity values across ACBRs
ExDet_df <- data.frame(x = numeric(),
y = numeric(),
ExDet = numeric(),
mic_univariate = numeric(),
mic_combinatorial = numeric(),
mic = numeric(),
source = character(),
ACBR = factor())
# Can't do ACBR 2 because solar radiation doesn't extend to here, run separately below.
ACBRS <- list(1, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16)
# Read in the Antarctic Conservation Bioregion polygons
ACBR_polygons <- st_read(here("Data/ACBRs_v2_2016.shp"), crs = 3031)
# Create a 1km raster with all values as 1
grid <- rast(ext = ext(wind),
resolution = 1000,
vals = 1)
crs(grid) <- "epsg:3031"
# Set up the environmental coverage function
run_env_analysis <- function(z) {
print(paste0("ACBR_number_", z))
# Filter ACBR polygons by target ACBR
ACBR_polygons <- ACBR_polygons %>%
filter(ACBR_ID == z)
# Filter bio points by ACBR
Ant_bio_filter <- st_filter(Ant_bio, ACBR_polygons)
ACBR_SPVE <- vect(ACBR_polygons)
Ant_bio_filter_SPVE <- vect(Ant_bio_filter)
# Make mask for ACBR from grid so it's at the same resolution
ACBR_mask <- terra::mask(grid, ACBR_SPVE)
# Extract enviro. covariate values across whole ACBR
env_cov <- terra::mask(cov.stk, ACBR_mask,
maskvalues = NA, updatevalue = NA)
# Extract enviro. covariate values across areas with bio data
bio_pts <- terra::extract(env_cov,
Ant_bio_filter_SPVE,
method = 'simple',
xy = T) %>%
.[,-1]
colnames(bio_pts) <- c("elev", "cloud", "wind", "temp", "melt", "solar", "slope", "roughness", "degree_days", "precip", "x", "y")
env_cov <- as.data.frame(env_cov,
xy = T,
na.rm = NA)
# Remove the cells from wider ACBR that are covered by bio data so you just retain novel enviro. space
env_cov_tidy <- dplyr::anti_join(env_cov, bio_pts, by = c("x", "y"))
colnames(env_cov_tidy) <- c("x", "y", "elev", "cloud", "wind", "temp", "melt", "solar", "slope","roughness", "degree_days", "precip")
# Compute extrapolation
extrap <- compute_extrapolation_func(samples = bio_pts,
covariate.names = covariates,
prediction.grid = env_cov_tidy,
coordinate.system = crs)
# Add to output list
output <<- c(output, extrap)
# Obtain the output summary
all <- extrap$data$all
# Join back to all environmental areas where there's no bio points, including those areas removed because they had an NA for enviro. data. These areas are renamed as "no_enviro".
output_df <- full_join(all, env_cov_tidy, by = c("x", "y")) %>%
.[, -c(7:16)] %>%
mutate(source = with(., ifelse(is.na(ExDet), "no_enviro", "enviro")))
# Now add in the locations where bio points are
output_df <- full_join(output_df, bio_pts, by = c("x", "y"))
# Remove duplicated bio points from the same raster cell
duplicates <- duplicated(output_df[c("x", "y")], fromLast = T)
output_df <- output_df[!duplicates, ]
# Name the source of cells where bio points are
output_df <- output_df %>%
mutate(source = with(., ifelse(!is.na(degree_days), "bio_pts", source))) %>%
.[, -c(8:17)] %>%
mutate(ACBR = z)
ExDet_df <<- rbind(ExDet_df, output_df)
}
# Run the function across all ACBRs except 2
map(ACBRS, run_env_analysis)
# Extract and save outputs
ACBR1 <- output[1:8]
ACBR3 <- output[9:16]
ACBR4 <- output[17:24]
ACBR5 <- output[25:32]
ACBR6 <- output[33:40]
ACBR7 <- output[41:48]
ACBR8 <- output[49:56]
ACBR9 <- output[57:64]
ACBR10 <- output[65:72]
ACBR11 <- output[73:80]
ACBR12 <- output[81:88]
ACBR13 <- output[89:96]
ACBR14 <- output[97:104]
ACBR15 <- output[105:112]
ACBR16 <- output[113:120]
# Set up the environmental coverage function
run_env_analysis_ACBR2 <- function(z) {
cov.stk2 <- c(cloud, wind, temp, melt, degree_days, precip)
# Filter ACBR polygons by target ACBR
ACBR_polygons <- ACBR_polygons %>%
filter(ACBR_ID == z)
# Filter bio points by ACBR
Ant_bio_filter <- st_filter(Ant_bio, ACBR_polygons)
ACBR_SPVE <- vect(ACBR_polygons)
Ant_bio_filter_SPVE <- vect(Ant_bio_filter)
# Make mask for ACBR from grid so it's at the same resolution
ACBR_mask <- terra::mask(grid, ACBR_SPVE)
# Extract enviro. covariate values across whole ACBR
env_cov <- terra::mask(cov.stk2, ACBR_mask,
maskvalues = NA, updatevalue = NA)
# Extract enviro. covariate values across areas with bio data
bio_pts <- terra::extract(env_cov,
Ant_bio_filter_SPVE,
method = 'simple',
xy = T) %>%
.[,-1]
colnames(bio_pts) <- c("cloud", "wind", "temp", "melt", "degree_days", "precip", "x", "y")
env_cov <- as.data.frame(env_cov,
xy = T,
na.rm = NA)
# Remove the cells from wider ACBR that are covered by bio data so you just retain novel enviro. space
env_cov_tidy <- dplyr::anti_join(env_cov, bio_pts, by = c("x", "y"))
colnames(env_cov_tidy) <- c("x", "y", "cloud", "wind", "temp", "melt", "degree_days", "precip")
# Compute extrapolation
extrap <- compute_extrapolation_func(samples = bio_pts,
covariate.names = covariates2,
prediction.grid = env_cov_tidy,
coordinate.system = crs)
# Add to output
output2 <<- extrap
# Obtain the output summary
all <- output2$data$all
# Join back to all environmental areas where there's no bio points, including those areas removed because they had an NA for enviro. data. These areas are renamed as "no_enviro".
output_df <- full_join(all, env_cov_tidy, by = c("x", "y")) %>%
.[, -c(7:12)] %>%
mutate(source = with(., ifelse(is.na(ExDet), "no_enviro", "enviro")))
# Now add in the locations where bio points are
output_df <- full_join(output_df, bio_pts, by = c("x", "y"))
# Remove duplicated bio points from the same raster cell
duplicates <- duplicated(output_df[c("x", "y")], fromLast = T)
output_df <- output_df[!duplicates, ]
# Name the source of cells where bio points are
output_df <- output_df %>%
mutate(source = with(., ifelse(!is.na(degree_days), "bio_pts", source))) %>%
.[, -c(8:13)] %>%
mutate(ACBR = z)
ExDet_df <<- rbind(ExDet_df, output_df)
}
# Remove solar, elev, roughness, and slope as they do not go up to where ACBR 2 is
covariates2 <- c("cloud", "wind", "temp", "melt","degree_days", "precip")
ACBRS <- list(2)
# Run just ACBR 2
map(ACBRS, run_env_analysis_ACBR2)
# Save output
ACBR2 <- output2
ACBR_results <- list(ACBR1, ACBR2, ACBR3, ACBR4, ACBR5, ACBR6, ACBR7, ACBR8, ACBR9, ACBR10, ACBR11, ACBR12, ACBR13, ACBR14, ACBR15, ACBR16)
ACBR_names <- c(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16)
The first table summarises the number of grid cells (n) and the percentage of grid cells (p) that display univariate, multivariate, or analogue environmental conditions by ACBR.
The second table summarises the percentage of univariate or multivariate dissimilarity caused by each environmental variable per ACBR.
summary_extrap_df <- data.frame(ACBR = numeric(),
Univariate_n = numeric(),
Univariate_p = numeric(),
Multivariate_n = numeric(),
Multivariate_p = numeric(),
Analogue_n = numeric(),
Analogue_p = numeric())
# Save the extrapolation result
for(i in 1:16) {
temp <- ACBR_results[[i]]$summary$extrapolation %>%
unlist()
temp <- as.data.frame(t(temp))
summary_extrap_df <- summary_extrap_df %>% add_row(ACBR = i,
Univariate_n = temp$univariate.n,
Univariate_p = temp$univariate.p,
Multivariate_n = temp$combinatorial.n,
Multivariate_p = temp$combinatorial.p,
Analogue_n = temp$analogue.n,
Analogue_p = temp$analogue.p)
}
summary_extrap_df %>%
grid.table()
# write.csv(summary_extrap_df, here("Outputs/ACBR_extrapolation_summary.csv"), overwrite = T)
summary_mic_df <- data.frame(ACBR = numeric(),
Univariate_n = numeric(),
Univariate_p = numeric(),
Multivariate_n = numeric(),
Multivariate_p = numeric(),
Analogue_n = numeric(),
Analogue_p = numeric())
# Save the extrapolation result
mic_combined <- data.frame(covariate = character(),
freq = numeric(),
Type = character(),
perc = numeric(),
ACBR = numeric())
for(i in 1:16) {
temp <- ACBR_results[[i]]$summary$mic
if (length(temp) == 1) {
uni <- temp[[1]] %>%
mutate(ACBR = i)
mic_combined <<- rbind(mic_combined, uni)
} else if (length(temp) == 2) {
uni <- temp[[1]]
multi <- temp[[2]]
combined <- rbind(uni, multi) %>%
mutate(ACBR = i)
mic_combined <<- rbind(mic_combined, combined)
} else { print("no extrapolation")}
}
mic_combined %>%
grid.table()
# write.csv(mic_combined, here("Outputs/ACBR_mic_summary.csv"), overwrite = T)
The combined dataframe has a column called “val_source” that is analogue, multivariate, or univariate, or specifies that the cell had no environmental data for at least one layer so was removed from the analysis (“no_enviro”).
# Load the Antarctic coastline for plotting
coastline <- st_read(here("Data/add_coastline_high_res_polygon_v7_5.shp"), crs = 3031)
# Create the final dataframe to save all dissimilarity values across ACBRs
# If there's a value for uni and multi, choose multi
# Otherwise, if uni has a value add that, or multi or analogue, otherwise NA
joined_df_final <- ExDet_df %>%
mutate(val_source = with(., ifelse(!is.na(mic_univariate) & !is.na(mic_combinatorial), "multivariate",
ifelse(!is.na(mic_univariate), "univariate",
ifelse(!is.na(mic_combinatorial), "multivariate",
"analogue"))))) %>%
mutate(val_source = with(., ifelse(source == "bio_pts", "bio_pts",
ifelse(source == "no_enviro", "no_enviro",
val_source))))
# Extract just the columns of interest
joined_df_source <- joined_df_final %>%
dplyr::select(x, y, val_source)
joined_df_final <- joined_df_final %>%
dplyr::select(x, y, ExDet, ACBR, val_source)
joined_df_source$val_source <- as.factor(joined_df_source$val_source)
# Set values to indices
# 1 = analogue, 2 = multivariate, 3 = no_enviro, 4 = univariate
joined_df_source$ExDet_source = as.numeric(joined_df_source$val_source)
# Save as rasters
rasterall_source <- rast(joined_df_source[,c(1:2, 4)], type = "xyz")
rasterall_ExDet <- rast(joined_df_final[,1:3], type = "xyz")
summary_source_df <- count(joined_df_final, ACBR, val_source) %>%
group_by(ACBR) %>%
mutate(record_sum = sum(n)) %>%
mutate(percentage = (n/record_sum)*100)
summary_source_df %>%
grid.table()
# write.csv(summary_source_df, here("Outputs/summary_source_df.csv"), overwrite = T)
# 1 km resolution
# writeRaster(rasterall_source, here("Outputs/ACBR_raster_ALL_source.asc"), overwrite = T)
# writeRaster(rasterall_ExDet, here("Outputs/ACBR_raster_ALL_ExDet.asc"), overwrite = T)
# write.csv(rasterall_source, here("Outputs/ACBR_source_ALL_df.csv"), overwrite = T)
# write.csv(rasterall_ExDet, here("Outputs/ACBR_ExDet_ALL_df.csv"), overwrite = T)
# 10 km resolution for plotting
coastline_SPVE <- vect(coastline)
# Create a 10 km raster
grid <- rast(ext = ext(coastline_SPVE),
resolution = 10000,
vals = NA)
crs(grid) <- "epsg:3031"
rasterall_10km <- resample(rasterall_source, grid, method = "bilinear")
# writeRaster(rasterall_source, here("Outputs/ACBR_raster_ALL_source_10km.asc"), overwrite = T)
Bouchet, P.J., Miller, D.L., Roberts, J.J., Mannocci, L., Harris, C.M., Thomas, L., 2020. dsmextra: Extrapolation assessment tools for density surface models. Methods in Ecology and Evolution 11, 1464–1469. https://doi.org/10.1111/2041-210X.13469
Mesgaran, M.B., Cousens, R.D., Webber, B.L., 2014. Here be dragons: a tool for quantifying novelty due to covariate range and correlation change when projecting species distribution models. Diversity and Distributions 20, 1147–1159. https://doi.org/10.1111/ddi.12209
Terauds, A., Lee, J.R., 2016. Antarctic biogeography revisited: updating the Antarctic Conservation Biogeographic Regions. Diversity and Distributions 22, 836–840. https://doi.org/10.1111/ddi.12453
sessionInfo()
## R version 4.2.3 (2023-03-15 ucrt)
## Platform: x86_64-w64-mingw32/x64 (64-bit)
## Running under: Windows 10 x64 (build 19045)
##
## Matrix products: default
##
## locale:
## [1] LC_COLLATE=English_Australia.utf8 LC_CTYPE=English_Australia.utf8
## [3] LC_MONETARY=English_Australia.utf8 LC_NUMERIC=C
## [5] LC_TIME=English_Australia.utf8
##
## attached base packages:
## [1] stats graphics grDevices utils datasets methods base
##
## other attached packages:
## [1] gridExtra_2.3 dsmextra_1.1.5 viridis_0.6.2 viridisLite_0.4.1
## [5] raster_3.6-20 sp_1.6-0 terra_1.7-18 sf_1.0-12
## [9] here_1.0.1 lubridate_1.9.2 forcats_1.0.0 stringr_1.5.0
## [13] dplyr_1.1.1 purrr_1.0.1 readr_2.1.4 tidyr_1.3.0
## [17] tibble_3.2.1 ggplot2_3.4.2 tidyverse_2.0.0
##
## loaded via a namespace (and not attached):
## [1] Rcpp_1.0.10 lattice_0.20-45 class_7.3-21 rprojroot_2.0.3
## [5] digest_0.6.31 utf8_1.2.3 R6_2.5.1 evaluate_0.20
## [9] e1071_1.7-13 highr_0.10 pillar_1.9.0 rlang_1.1.0
## [13] rstudioapi_0.14 jquerylib_0.1.4 rmarkdown_2.21 rgdal_1.6-5
## [17] htmlwidgets_1.6.2 munsell_0.5.0 proxy_0.4-27 compiler_4.2.3
## [21] xfun_0.38 pkgconfig_2.0.3 htmltools_0.5.5 tidyselect_1.2.0
## [25] lpSolve_5.6.18 codetools_0.2-19 fansi_1.0.4 tzdb_0.3.0
## [29] withr_2.5.0 grid_4.2.3 jsonlite_1.8.4 gtable_0.3.3
## [33] lifecycle_1.0.3 DBI_1.1.3 magrittr_2.0.3 units_0.8-1
## [37] scales_1.2.1 KernSmooth_2.23-20 cli_3.6.1 stringi_1.7.12
## [41] cachem_1.0.7 leaflet_2.1.2 bslib_0.4.2 generics_0.1.3
## [45] vctrs_0.6.1 pbmcapply_1.5.1 tools_4.2.3 glue_1.6.2
## [49] hms_1.1.3 crosstalk_1.2.0 parallel_4.2.3 fastmap_1.1.1
## [53] yaml_2.3.7 timechange_0.2.0 colorspace_2.1-0 classInt_0.4-9
## [57] knitr_1.42 sass_0.4.5