This code assesses the temporal coverage (date of most recent record per unit area) of the SCAR Antarctic Terrestrial Biodiversity Database. It also assesses the spatial autocorrelation of this coverage by calculating Moran’s I.

Initial setup

Load packages and set working directory.

library(tidyverse)

packages <- c("sf", "terra", "here", "viridis", "lubridate", "spdep", "ggpubr")

walk(packages, require, character.only = T)

here::here()

Load and prepare data

Read in the cleaned biodiversity database (cleaned in file “Terrestrial_Antarctic_Biodiversity_Database_Cleaning.Rmd”), the cleaned ice-free area vector (cleaned in file “ADD_ice_free_area_cleaning.Rmd”) and the coastline (Antarctic Digital Database coastline v. 7.5).

# Load biodiversity data as dataframe and save
Ant_biodf <- read.csv(here("Data/SCAR_Ant_Terr_Bio_DataBase_MASTER_20-10-2023.csv"))

# Load cleaned ice-free area vector 
ice_free <- st_read(here("Data/AAD_ice_free_vector_TIDIED.shp"), crs = 3031)

# Also convert to SpatVector for extract function
ice_free_SPVE <- vect(ice_free)

# Load the Antarctic coastline for plotting
coast <- st_read(here("Data/add_coastline_high_res_polygon_v7_5.shp"), crs = 3031)

# Convert to SpatVector for extract function
coastline_SPVE <- vect(coast)

# List target grid resolutions (1km, 20km, 100km, 200km)
res <- list(20000, 100000, 200000) 
# res <- list(1000, 20000, 100000, 200000) # 1km is slow to run and was run on QUT HPC. The following code is just for the 20km, 100km, and 200km resolutions.

# Index list
i <- list(1,2,3)
# i <- list(1,2,3,4)

# Check if there are any datasets with the Date but not the year, there are none
test <- !is.na(Ant_biodf[,9]) & is.na(Ant_biodf[,10])
sum(test)

# Find rows that only contain Date information and make Date a datetime variable
Ant_bio_date <- Ant_biodf %>% 
  filter(!is.na(Date)) %>% 
  mutate(Date = lubridate::ymd(Date))

# Remove rows that contain no month information if there is no Date
# Adjust the day so that these records are set to the 1st of the month
Ant_bio_month_year <- Ant_biodf %>% 
  filter(is.na(Date)) %>% 
  filter(!is.na(year)) %>% 
  filter(!is.na(month)) %>% 
  mutate(day = 1) %>% 
  mutate(Date = lubridate::make_date(year = as.integer(year),
                                     month = month,
                                     day = day))

# Now find rows with no month information, only year. Set these to the 1st of the 1st
Ant_bio_year_only <- Ant_biodf %>% 
  filter(is.na(Date)) %>% 
  filter(!is.na(year)) %>% 
  filter(is.na(month)) %>% 
  mutate(month = 1) %>% 
  mutate(day = 1) %>% 
  mutate(Date = lubridate::make_date(year = as.integer(year),
                                     month = month,
                                     day = day))


# Create all dates as datetime variable
Ant_bio_date_month_year <- rbind(Ant_bio_date, Ant_bio_month_year, Ant_bio_year_only) %>% 
  mutate(recordID_2 = 1:nrow(.)) # Give the records a new ID

Count and plot the number of records over time

# Count the number of records per Date
count <- Ant_bio_date_month_year %>% 
  count(Date) %>% 
  as.data.frame() 

 count %>% 
  ggplot(aes(x = Date, y = n)) +
  geom_line() +
  geom_smooth(method = "lm")+
  theme_classic()
## `geom_smooth()` using formula = 'y ~ x'

Function to calculate the time since last record across resolutions

# Set up a list to save temporal rasters
temporal_raster_list <- vector(mode = "list", length = 4)

# Save an empty stack for saving output rasters
temporal_rast_stack <- c()

# Convert biodiversity dataframe to sf polygon
Ant_bio <- st_as_sf(Ant_bio_date_month_year, 
                    coords = c("decimalLongitude", "decimalLatitude"), 
                    crs = 4326) # set as WGS 1984

# Project to WGS_1984_Stereographic_South_Pole
Ant_bio <- st_transform(Ant_bio, 3031) 

# Convert to a SpatVector for extract function below
Ant_bio_SPVE <- vect(Ant_bio)

temporal_func <- function(res, i) {
  
  # Time since last record  -----------------------------------------------
  
  print(paste0("res = ", res))
  
  # Create a 1km raster with all values as 1
  grid <- rast(ext = ext(coastline_SPVE),
               resolution = res,
               vals = 1) 
  
  crs(grid) <- "epsg:3031"
  
  # Add a one cell buffer to make sure all records are covered
  grid <- terra::extend(grid, 1, fill = 1)
  
  # Make raster and give all raster values a unique ID
  gridUNIQUE <- rast(ext = ext(grid), 
                     resolution = res, 
                     vals = 1:ncell(grid)) 
  
  crs(gridUNIQUE) <- "epsg:3031"
  
  # Find the ice-free grid cell ID each bio point lies in with a unique value
  # ID is the ID of the ant bio observation, lyr.1 is the unique ID of the cell
  extractUNIQUE <- terra::extract(gridUNIQUE, Ant_bio_SPVE)
  
  # Change so column names match for join (ID to recordID_2, lyr.1 to cellID)
  colnames(extractUNIQUE) <- c("recordID_2", "cellID")
  
  # Join by id, adding in the cellID column
  Ant_bio_gridID <- full_join(extractUNIQUE, Ant_bio, by = "recordID_2")
  
  # Calculate the most recent record per cell
  # Results in a subset of cells with the most recent date
  Ant_bio_recent <- Ant_bio_gridID[with(Ant_bio_gridID, order (cellID, Date)), ]
  Ant_bio_recent <- Ant_bio_recent[cumsum(table(Ant_bio_recent$cellID)), ] %>% 
    select(cellID, Date)
  
  # Make gridUNIQUE raster a dataframe
  gridUNIQUEdf <- as.data.frame(gridUNIQUE, xy = TRUE) 
  colnames(gridUNIQUEdf) <- c("x", "y", "cellID")
  
  # Full join retains all values, join the grid dataframe and  the count values by the cell unique ID
  temporal_rast <- full_join(gridUNIQUEdf, Ant_bio_recent, by = "cellID") %>% 
    .[, -3] 
  
  # Get the current date
  today <- today()
  
  # Calculate the time since the record in days
  int <- interval(temporal_rast$Date, today)
  
  # Convert days to years and add to raster
  yr_diff <- lubridate::time_length(int, unit = "year")
  
  temporal_rast <- temporal_rast %>% 
    mutate(time_from_today = yr_diff) %>% 
    .[, -3]
  
  # Convert to terra SpatRaster
  temporal_rast <- temporal_rast %>% 
    rast(.)
  
  crs(temporal_rast) <- "epsg:3031"
  
  # Mask out non ice-free areas ---------------------------------------------
  
  # Set the grid cells that are NOT ice-free areas to 0
  ice_free_mask <- mask(grid, ice_free_SPVE, 
                        updatevalue = 0) 
  
  # Mask the final raster of record counts by ice_free polygon, non ice-free areas become NA
  temporal_rast_masked <- mask(temporal_rast, ice_free_mask,  
                               maskvalues = 0, updatevalue = NA)
  
  
# Save raster to raster stack  -----------------------------------------------------

  temporal_rast_stack <<- c(temporal_rast_stack, temporal_rast_masked)
  
  temporal_raster_list[[i]] <<- temporal_rast_masked
}

# Run the temporal coverage function
map2(res, i, temporal_func)

Optional - saving and plotting temporal coverage rasters.

# writeRaster(temporal_raster_list[[1]], here("Outputs/Temporal_coverage_1km_rast.asc"), overwrite = T)
# 
# writeRaster(temporal_raster_list[[1]], here("Outputs/Temporal_coverage_20km_rast.asc"), overwrite = T)
# 
# writeRaster(temporal_raster_list[[2]], here("Outputs/Temporal_coverage_100km_rast.asc"), overwrite = T)
# 
# writeRaster(temporal_raster_list[[3]], here("Outputs/Temporal_coverage_200km_rast.asc"), overwrite = T)


# Save each res as a dataframe

# temporal_1kmdf <- as.data.frame(temporal_rast_stack[[1]], xy = TRUE)

temporal_20kmdf <- as.data.frame(temporal_rast_stack[[1]], xy = TRUE)

temporal_100kmdf <- as.data.frame(temporal_rast_stack[[2]], xy = TRUE)

temporal_200kmdf <- as.data.frame(temporal_rast_stack[[3]], xy = TRUE)

# Set dataframe zeros to NA

# temporal_1kmdf$n_nas <- ifelse(temporal_1kmdf$time_from_today == 0, NA, temporal_1kmdf$time_from_today)

temporal_20kmdf$n_nas <- ifelse(temporal_20kmdf$time_from_today == 0, NA, temporal_20kmdf$time_from_today)

temporal_100kmdf$n_nas <- ifelse(temporal_100kmdf$time_from_today == 0, NA, temporal_100kmdf$time_from_today)

temporal_200kmdf$n_nas <- ifelse(temporal_200kmdf$time_from_today == 0, NA, temporal_200kmdf$time_from_today)

# Save plots
# plot1km <- ggplot() +
#   geom_sf(data = coast, fill = "grey94", alpha = 0.5, size = 0.00001)  +
#   geom_raster(data = temporal_1kmdf, aes(x, y, fill = time_from_today), alpha = 0.87) +
#   scale_fill_viridis(na.value = "black") +
#   labs(x = "Longitude", y = "Latitude", fill = str_wrap("Time since last record (years)", 17)) +
#   geom_area(inherit.aes = F, mapping = aes(y = 1, x = 1, color = "Zero records"), data = data.frame()) +
#   scale_colour_manual(name = element_blank(), values="#FFFFCC") +
#   guides(fill = guide_colourbar(ticks = F)) +
#   theme_void()
# 
# plot1km <- plot1km + annotate("text", x = Inf, y = Inf, label = "1 km resolution", vjust = 1, hjust = 1)

plot20km <- ggplot() +
  geom_sf(data = coast, fill = "grey94", alpha = 0.5, size = 0.00001)  +
  geom_raster(data = temporal_20kmdf, aes(x, y, fill = time_from_today), alpha = 0.87) +
  scale_fill_viridis(na.value = "black") +
  labs(x = "Longitude", y = "Latitude", fill = str_wrap("Time since last record (years)", 17)) +
  geom_area(inherit.aes = F, mapping = aes(y = 1, x = 1, color = "Zero records"), data = data.frame()) +
  scale_colour_manual(name = element_blank(), values="#FFFFCC") +
  guides(fill = guide_colourbar(ticks = F)) +
  theme_void()

plot20km <- plot20km + annotate("text", x = Inf, y = Inf, label = "20 km resolution", vjust = 1, hjust = 1)

plot100km <- ggplot() +
  geom_sf(data = coast, fill = "grey94", alpha = 0.5, size = 0.00001)  +
  geom_raster(data = temporal_100kmdf, aes(x, y, fill = time_from_today), alpha = 0.87) +
  scale_fill_viridis(na.value = "black") +
  labs(x = "Longitude", y = "Latitude", fill = str_wrap("Time since last record (years)", 17)) +
  geom_area(inherit.aes = F, mapping = aes(y = 1, x = 1, color = "Zero records"), data = data.frame()) +
  scale_colour_manual(name = element_blank(), values="#FFFFCC") +
  guides(fill = guide_colourbar(ticks = F)) +
  theme_void()

plot100km <- plot100km + annotate("text", x = Inf, y = Inf, label = "100 km resolution", vjust = 1, hjust = 1)

plot200km <- ggplot() +
  geom_sf(data = coast, fill = "grey94", alpha = 0.5, size = 0.00001)  +
  geom_raster(data = temporal_200kmdf, aes(x, y, fill = time_from_today), alpha = 0.87) +
  scale_fill_viridis(na.value = "black") +
  labs(x = "Longitude", y = "Latitude", fill = str_wrap("Time since last record (years)", 17)) +
  geom_area(inherit.aes = F, mapping = aes(y = 1, x = 1, color = "Zero records"), data = data.frame()) +
  scale_colour_manual(name = element_blank(), values="#FFFFCC") +
  guides(fill = guide_colourbar(ticks = F)) +
  theme_void()


plot200km <- plot200km + annotate("text", x = Inf, y = Inf, label = "200 km resolution", vjust = 1, hjust = 1)

# Arrange all plots into a figure
# temporal_coverage_plot <- ggarrange(plot1km + rremove("ylab") + rremove("xlab"), plot20km + rremove("ylab") + rremove("xlab"), plot100km + rremove("ylab") + rremove("xlab"), plot200km + rremove("ylab") + rremove("xlab"), nrow = 2, ncol = 2, align = "v", common.legend = TRUE, legend = "right") 

temporal_coverage_plot <- ggarrange(plot20km + rremove("ylab") + rremove("xlab"), plot100km + rremove("ylab") + rremove("xlab"), plot200km + rremove("ylab") + rremove("xlab"), nrow = 2, ncol = 2, align = "v", common.legend = TRUE, legend = "right")

print(temporal_coverage_plot)

Spatial autocorrelation of coverage

Using Moran’s I from the package spdep, calculate the spatial autocorrelation of the grid cell coverage. First we set up the neighbourhood distance to define cells that are considered ‘adjacent’. We chose neighbourhood distances for our 1km, 20km, 100km, and 200km resolutions to be 1.5km, 30km, 150km, and 300km. Neighbours are therefore adjacent by the Queen’s adjacency rule.

# Set up the dataframe for saving
MoransI <- data.frame(Resolution = numeric(), MoransI = numeric())

MoransI_func <- function(res, i) {
  
  # Calculating Moran's I  -------------------------------------------------
  
  # Calculate the neighbourhood distance as 1.5 x resolution
  neighbour_dist <- res*1.5
  
  update_rast_xy <- temporal_raster_list[[i]] %>% 
    as.data.frame(xy=TRUE) %>% 
    .[, -3] #remove n
  
  print("update_rast_xy created")
  
  # dnearneigh is in metres, finds to index of all values within the specified distance
  nb1 <- dnearneigh(as.matrix(update_rast_xy), 0, neighbour_dist)
  
  # For viewing which neighbours are selected
  # plot(nb1, coords = as.matrix(update_rast_xy))
  
  print("...Nearest neighbours found...")
  
  # Print list of distances
  # dists <- nbdists(nb1, update_rast_xy)
  
  # Generate a weights object
  weights <- nb2listw(nb1, 
                      glist = NULL,
                      style = "B",
                      zero.policy = TRUE) 
  
  print("...Weights calculated...")
  
  # Then extract just the values per cell
  var <- temporal_raster_list[[i]] %>% 
    as.data.frame() %>% 
    .[,1] 
  
  # Normalize the data so it is not affected by high values
  normalize <- function(x, na.rm = TRUE) {
    return((x- min(x)) /(max(x)-min(x)))
  }
  
  norm_var <- normalize(var)
  # hist(norm_var)
  
  
  # Moran's I value for normalized data
  I <- moran(norm_var, 
             weights, 
             zero.policy = T, 
             length(nb1), 
             Szero(weights)) %>% 
    .[[1]]
  
  print("...Moran's I calculated...")
  
  # Add output to dataframe
  MoransI <<- MoransI %>% 
    add_row(Resolution = res, MoransI = I)
  
  # Hypothesis test  ----------------------------------------------------
  
  # Run a hypothesis test via permutation
  set.seed(1)
  moran_bootstrap <- moran.mc(var,
                              weights,
                              nsim = 10000,
                              zero.policy = TRUE)
  
  print("...Bootstrapping finished...")
  
  # Print summary outputs  ----------------------------------------------
  
  # Print the output of the hypothesis test
  print(moran_bootstrap)
  
  # Plot the density of expected (under randomisation) vs. observed values of Moran's I 
  plot(moran_bootstrap, main="", las=1)
  
  # Plot the relationship between a cell's value (no. records) and its nearest neighbour values
  moran.plot(var, weights, zero.policy = T)
  
}

# Map the correlation function -------------------------------------------------------

map2(res, i, MoransI_func)

# Print the dataframe of estimates
print(MoransI)
##   Resolution   MoransI
## 1      2e+04 0.4061841
## 2      1e+05 0.3230886
## 3      2e+05 0.2679467

Session information

sessionInfo()
## R version 4.2.3 (2023-03-15 ucrt)
## Platform: x86_64-w64-mingw32/x64 (64-bit)
## Running under: Windows 10 x64 (build 19045)
## 
## Matrix products: default
## 
## locale:
## [1] LC_COLLATE=English_Australia.utf8  LC_CTYPE=English_Australia.utf8   
## [3] LC_MONETARY=English_Australia.utf8 LC_NUMERIC=C                      
## [5] LC_TIME=English_Australia.utf8    
## 
## attached base packages:
## [1] stats     graphics  grDevices utils     datasets  methods   base     
## 
## other attached packages:
##  [1] ggpubr_0.6.0      spdep_1.2-8       spData_2.3.0      viridis_0.6.4    
##  [5] viridisLite_0.4.2 here_1.0.1        terra_1.7-55      sf_1.0-14        
##  [9] lubridate_1.9.3   forcats_1.0.0     stringr_1.5.0     dplyr_1.1.3      
## [13] purrr_1.0.2       readr_2.1.4       tidyr_1.3.0       tibble_3.2.1     
## [17] ggplot2_3.4.4     tidyverse_2.0.0  
## 
## loaded via a namespace (and not attached):
##  [1] sass_0.4.7         jsonlite_1.8.7     splines_4.2.3      carData_3.0-5     
##  [5] bslib_0.5.1        sp_2.1-1           yaml_2.3.7         pillar_1.9.0      
##  [9] backports_1.4.1    lattice_0.22-5     glue_1.6.2         digest_0.6.33     
## [13] ggsignif_0.6.4     colorspace_2.1-0   cowplot_1.1.1      Matrix_1.6-1.1    
## [17] htmltools_0.5.6.1  pkgconfig_2.0.3    broom_1.0.5        s2_1.1.4          
## [21] scales_1.2.1       tzdb_0.4.0         timechange_0.2.0   proxy_0.4-27      
## [25] mgcv_1.9-0         farver_2.1.1       generics_0.1.3     car_3.1-2         
## [29] cachem_1.0.8       withr_2.5.1        cli_3.6.1          magrittr_2.0.3    
## [33] deldir_1.0-9       evaluate_0.22      fansi_1.0.5        nlme_3.1-163      
## [37] rstatix_0.7.2      class_7.3-22       tools_4.2.3        hms_1.1.3         
## [41] lifecycle_1.0.3    munsell_0.5.0      compiler_4.2.3     jquerylib_0.1.4   
## [45] e1071_1.7-13       rlang_1.1.1        classInt_0.4-10    units_0.8-4       
## [49] grid_4.2.3         rstudioapi_0.15.0  labeling_0.4.3     rmarkdown_2.25    
## [53] boot_1.3-28.1      wk_0.9.0           gtable_0.3.4       codetools_0.2-19  
## [57] abind_1.4-5        DBI_1.1.3          R6_2.5.1           gridExtra_2.3     
## [61] knitr_1.44         fastmap_1.1.1      utf8_1.2.4         rprojroot_2.0.3   
## [65] KernSmooth_2.23-22 stringi_1.7.12     Rcpp_1.0.11        vctrs_0.6.4       
## [69] tidyselect_1.2.0   xfun_0.40