This code calculates the geographic bias of the SCAR Antarctic Terrestrial Biodiversity Database with methods from Zizka et al., (2021). NOTE - in this code I have commented out lines that run functions for knitting the document so no results or plots are displayed.

library(tidyverse)

#require("devtools")
#install_github("azizka/sampbias")

packages <- c("sampbias", "rgdal", "terra", "sf", "sp", "raster", "viridis", "ggstance", "here")

walk(packages, require, character.only = T)

here::here()

Load the additional function code

Code for additional functions is from the sampbias package (Zizka et al., 2021).

source(here("Additional_functions/RunSampBias_function.R"))

Load data and layer setup

Read in the cleaned biodiversity database (cleaned in file “Terrestrial_Antarctic_Biodiversity_Database_Cleaning.Rmd”), the cleaned ice-free area vector (cleaned in file “ADD_ice_free_area_cleaning.Rmd”), the location of stations vector (COMNAP Antarctic Facilities), the Antarctic Conservation Biogeographic Regions (ACBRs; Terauds and Lee, 2016), and the coastline (Antarctic Digital Database coastline v. 7.5).

# Layer preparation -------------------------------------------------------

# Generate distance to accessibility feature raster
coastline <- st_read(here("Data/add_coastline_high_res_polygon_v7_5.shp"), crs = 3031)
coastline_SPVE <- vect(coastline)

stations_df <- read.csv(here("Data/COMNAP_Antarctic_Facilities_Master_2020.csv"))
stations <- st_as_sf(stations_df,
                     coords = c("Longitude..DD.", "Latitude..DD."),
                     crs = 4326) # Set as WGS 1984

stations <- st_transform(stations, 3031) # Project to WGS_1984_Stereographic_South_Pole
stations_SPVE <- vect(stations)

# Choose the resolution to work at (1km):
res <- 1000

# Create a 1km raster with all values as 1
grid_coast <- rast(ext = ext(coastline_SPVE),
                   resolution = res, 
                   vals = 1) 

crs(grid_coast) <- "epsg:3031"

# Add a one cell buffer to make sure all records are covered
grid_coast <- terra::buffer(grid_coast, 1)
terra::global(grid_coast, fun="isNA")

# Calculate the distance of each grid cell to the nearest station
dist_stationterra <- terra::distance(grid_coast, stations_SPVE)

# Save for next step
wgs84 <- CRS('+proj=longlat +ellps=WGS84 +datum=WGS84 +no_defs')

# Load biodiversity data as dataframe and save
Ant_biodf <- read.csv(here("Data/Ant_Terr_Bio_Data_Uncertainty_Terms_Removed_August_2023.csv"), header = T)

# Convert biodiversity df to sf polygon feature and save
Ant_bio <- st_as_sf(Ant_biodf, 
                    coords = c("decimalLongitude", "decimalLatitude"), 
                    crs = 4326) # Set as WGS 1984

Ant_bio <- st_transform(Ant_bio, 3031) 

# Create a raster around continent
ea_raster <- grid_coast

# Transform bias layers to raster from spatRaster 
dist_station <- raster(dist_stationterra)

# Create a list saving the bias layer
bias_layers <- list(station = dist_station)

# Load the ACBRs
ACBR_polygons <- st_read(here("Data/ACBRs_v2_2016.shp"), crs = 3031)

Optional - plot the distance raster

# Mask the distance layer to coast
# dist_station_land <- terra::mask(dist_stationterra, coastline_SPVE,
#                                  updatevalue = NA)
# 
# dist_stationdf <- as.data.frame(dist_station_land, xy=T)
# 
# ggplot() +
#   geom_sf(data = coastline, fill = "grey94", alpha = 0.5, size = 0.00001) +
#   geom_raster(data = dist_stationdf, aes(x, y, fill = lyr.1/1000)) +
#   labs(x = "Longitude", y = "Latitude", fill = "Distance to station (km)", size = 19) +
#   theme_classic()

Set up sampbias analysis for all ACBRs

We will run the sampbias function for all ACBRs separately.

# Set up the final list
output <- list()

ACBRS <- list(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16)

Run the sampbias analysis for all ACBRs

The code is from Zizka et al. (2021), however it has been adapted to increase processing speed and to silence some unnecessary functions. Notes on changes are commented throughout the code.

run_bias_analysis <- function(n) {
  
  print(paste0("ACBR_number_", n))
  
  ACBR_polygons <- ACBR_polygons %>% 
    filter(ACBR_ID == n) %>% 
    sf::st_buffer(., 200) # Buffer by 200m so we don't miss any points that are nearby
  
  # Filter out bio points in ACBR
  Ant_bio_filter <- st_filter(Ant_bio, ACBR_polygons)
  
  # Set format to match sampbias requirements
  ea_occ <- data.frame(species = Ant_bio_filter$scientificNameClean, st_coordinates(Ant_bio_filter))
  
  # Now need to trim ea_raster (enviro. raster) to just the designated ACBR
  ea_raster_terra <- terra::mask(ea_raster, ACBR_polygons, 
                                 updatevalue = NA) 
  
  # Convert to raster
  ea_raster <- raster(ea_raster_terra)
  
  # Set up initial conditions
  x = ea_occ
  gaz = bias_layers
  res = 1000
  buffer = NULL
  restrict_sample = NULL
  terrestrial = FALSE # Setting to false because I don't want it to trim the data via the coastline
  inp_raster = ea_raster
  mcmc_rescale_distances = 1000 # This says to rescale distances from m to km 
  mcmc_iterations = 1e+05
  mcmc_burnin = 2e+04
  mcmc_outfile = NULL
  prior_q = c(1, 0.01)
  prior_w = c(1, 1)
  plot_raster = FALSE 
  verbose = TRUE
  run_null_model = FALSE
  use_hyperprior = FALSE # Not using a hyperprior because only one bias factor being looked at
  
  # Convert x to SpatialPoints
  dat.pts <- sp::SpatialPoints(x[, c("X", "Y")])
  
  
  if(!is.null(inp_raster)){
    dum.ras <- inp_raster
  }else{
    dum.ras <- raster::raster(round(extent(dat.pts), .DecimalPlaces(res)))
    res(dum.ras) <- res
  }
  
  # Warning if combination of resolution and extent exceed 1mio grid cells
  if (raster::ncell(dum.ras) > 1e+06) {
    warning("Huge raster size")
  }
  
  # Occurrence raster
  if (verbose) {
    message("Creating occurrence raster...")
  }
  
  # Rasterize the occurrence points to the coast - study extent
  occ.out <- raster::rasterize(dat.pts, dum.ras, fun = "count")
  
  #occ.out <-  raster::trim(occ.out) ## Muted this as I don't want to trim it
  
  # Set all values to zero
  occ.out[is.na(occ.out)] <- 0
  
  if (terrestrial) { ## Have set this to false, not needed
    if(verbose){message("Adjusting to terrestrial surface...")}
    
    # Get landmass
    if(!is.null(inp_raster)){
      wrld <- spTransform(sampbias::landmass, CRSobj = proj4string(inp_raster))
      wrld <- raster::crop(wrld, extent(occ.out))
      wrld <- raster::rasterize(wrld, occ.out)
    }else{
      wrld <- raster::crop(sampbias::landmass, extent(occ.out)) ## Landmass is a global coastlines vector, including Antarctica
      wrld <- raster::rasterize(wrld, occ.out)
    }
    # Adjust raster
    occ.out <-  raster::mask(occ.out, wrld)
    occ.out <-  raster::trim(occ.out)
  }
  
   ## Adapt occurrence raster to custom study extent if provided
  # UPDATED from the package to run faster
  # Trim by just the ice-free area of the chosen ACBR
  occ.out.terra <- rast(occ.out)
  occ.out.terra <- terra::mask(occ.out.terra, ea_raster_terra, 
                               updatevalue = NA) 
  
  # Occ.out is now just 0 for sites in the *chosen ACBR* that have no data 
  occ.out <- raster(occ.out.terra)
  
  # Removed the is.raster check because it wasn't recognising the rasters
  dis.ras <- gaz
  
  if (all(is.na(values(dis.ras[[1]])))) {
    stop("No valid distances found. Consider setting terrestrial = F")
  }
  
  # Plot the occurrence and distance rasters for diagnostics
  if(plot_raster){ ## Set this to false as it was quite slow
    # plot(stack(dis.ras))
    plot(occ.out, main = "Occurrence raster")
    if(!is.null(restrict_sample)){
      plot(restrict_sample, add = TRUE)
    }
  }
  
  ## Check if a distance was found for any gazetteer, if not, only return occurrence raster
  if (verbose) {
    message("Estimating bias...")
  }
  
  
  if (is.logical(dis.ras)) { # returns FALSE
    out <- c(Occurences = occ.out)
    class(out) <- append("sampbias", class(out))
    return(out)
  } else {
    
    # Generate the data.frame with the counts and distances
    dis.vec <- lapply(dis.ras, "getValues") # Get the distance values
    dis.vec <- as.data.frame(do.call(cbind, dis.vec))
    names(dis.vec) <- names(dis.ras)
    
    dis.vec <- data.frame(cell_id = seq_len(nrow(dis.vec)), # Match the cells with number of records + distance value
                          record_count = getValues(occ.out),
                          dis.vec)
    dis.vec <- dis.vec[complete.cases(dis.vec),] # This is where only ice-free areas from target ACBR are included
    
    # Counting the number of cells with and without records
    rec_count <- c(sum(dis.vec$record_count == 0), sum(dis.vec$record_count > 0))
    
    # Run the function
    out <- RunSampBias(x = dis.vec,
                       rescale_distances = mcmc_rescale_distances,
                       iterations = mcmc_iterations,
                       burnin = mcmc_burnin,
                       prior_q = prior_q,
                       prior_w = prior_w,
                       outfile = mcmc_outfile,
                       run_null_model = run_null_model,
                       use_hyperprior = use_hyperprior)
    
    # Create output file, a list of the class sampbias
    if (verbose) {
      message("Preparing output...")
    }
    
    # Generate the output object
    out <- list(summa = list(total_occ = nrow(x),
                             total_sp = length(unique(x$species)),
                             extent = extent(dum.ras),
                             res = res,
                             restrict_sample = restrict_sample,
                             rescale_distances = mcmc_rescale_distances,
                             data_availability = rec_count),
                occurrences = occ.out,
                bias_estimate = out,
                distance_rasters = stack(dis.ras))
    class(out) <- append("sampbias", class(out))
    
    if (verbose) {
      message(" Done\n")
    }
    
    
  }
  
  output <<- c(output, out)
  
}

# Run the sampbias function across ACBRs ----------------------------------

# map(ACBRS, run_bias_analysis)

Save the results of the analysis

Code to extract results from the output of class ‘sampbias’. Some code is adapted from Zizka et al. (2021), function plot.sampbias

ACBR_bias <- data.frame(ACBR_ID = factor(),
                        type = factor(), # Empirical or null
                        mean = numeric(), 
                        lower = numeric(),
                        upper = numeric())

# Define the saving function ----------------------------------------------

saving_func <- function(n) {

  # Set up indices to extract the four values of interest from the output list
  indS <- (4*(n-1)) + 1
  indE <- 4*n

  bias.out <- output[indS:indE]

  # Create a dataframe of posterior values
  plo1 <-  bias.out$bias_estimate %>%
    pivot_longer(cols = contains("w_"), names_to = "bias",
                 values_to = "posterior_estimate") %>%
    mutate(bias = gsub("w_", "", .data$bias)) %>%
    mutate(bias = fct_reorder(.data$bias, .data$posterior_estimate, .fun = median, .desc = FALSE))

  # Calculate the 95% Credible Interval
  CI <- quantile(plo1$posterior_estimate, c(0.025, 0.975))

  ACBR_bias <<- ACBR_bias %>% add_row(ACBR_ID = as.factor(n),
                                      type = "Empirical",
                                      mean = mean(plo1$posterior_estimate),
                                      lower = as.numeric(CI[1]),
                                      upper = as.numeric(CI[2]))

}


# Run the function saving results to a dataframe --------------------------

# map(ACBRS, saving_func)

Plotting the posterior distribution of w and the fitted bias functions per ACBR

Some code is adapted from Zizka et al. (2021), function plot.sampbias

# PLOTTING ----------------------------------------------------------------

run_bias_plots <- function(n) {

  # Set up indices to extract the four values from the output list
  indS <- (4*(n-1)) + 1
  indE <- 4*n

  bias.out <- output[indS:indE]

  ## MCMC has returned samples from the posterior, hence I take them from the bias.out object

  # Makes columns, it = iteration, likA = likelihood, priorA = prior, q is the q parameter, posterior=posterior
  plo1 <-  bias.out$bias_estimate %>%
    pivot_longer(cols = contains("w_"), names_to = "bias",
                 values_to = "posterior_estimate") %>%
    mutate(bias = gsub("w_", "", .data$bias)) %>%
    mutate(bias = fct_reorder(.data$bias, .data$posterior_estimate, .fun = median, .desc = FALSE))

  # Plots the posterior distribution of w
  p1 <- ggplot(plo1, aes(x = posterior_estimate))+
    geom_density(linewidth = 0.6, fill = "lightblue", alpha = 0.7)+
    xlab("Bias weight")+
    ylab("Density")+
    theme(axis.title.x = element_text(size = 22),
          axis.title.y = element_text(size = 22),
          axis.text.x = element_text(size=18),
          axis.text.y = element_text(size=18))+
    theme_classic()


  res_text <- paste0("ACBR ", n)

  p1 <- p1 + annotate("text", x = Inf, y = Inf, label = res_text, vjust = 1, hjust = 1) + geom_vline(aes(xintercept=mean(posterior_estimate)),                                                                                                     linetype = "dashed", size =0.72)

  print(p1)

  plo2_w <-colMeans(bias.out$bias_estimate) # Det the mean of each column
  plo2_dist <- seq(1,1000,length.out=1000) # Distances from bias 1 - 1000 km


  plo2_95 <- sapply(bias.out$bias_estimate[,4:5], function(x) quantile(x, c(0.025, 0.975)))
  plo2_w <- c(plo2_w, plo2_95[1,1], plo2_95[2,1], plo2_95[1,2], plo2_95[2,2])

  names <- c("it", "likA", "priorA", "q", "w_", "hp_rate", "q_lower", "q_upper", "w_lower", "w_upper")
  plo2_w <- setNames(plo2_w, names)

  # Plot the fitted bias function
  plo2 <- data.frame(dist = plo2_dist,
                     meanrate = plo2_w[4] * exp(-plo2_w[5]*plo2_dist),
                     lower = plo2_w[7] * exp(-plo2_w[9]*plo2_dist),
                     upper = plo2_w[8] * exp(-plo2_w[10]*plo2_dist),
                     id = names(bias.out$bias_estimate)[-c(1:4, ncol(bias.out$bias_estimate))])


  p2 <- ggplot(plo2)+
    geom_line(aes(x = .data$dist, y = .data$meanrate))+
    geom_point(aes(x = .data$dist, y = .data$meanrate), size = 0.25)+
    scale_color_viridis(discrete = TRUE)+
    xlab("Distance to bias (km)")+
    ylab("Sampling rate")+
    geom_errorbar(aes(.data$dist, ymin = lower, ymax = upper), color = "gray", alpha = 1) +
    xlim(0, 100) +
    theme_classic()+
    theme(panel.grid.minor.x = element_blank(),
          panel.grid.major.y = element_blank(),
          legend.title = element_blank(),
          legend.position = "bottom",
          axis.text = element_text(size = 18),
          axis.title = element_text(size = 20))


  p2 <- p2 + annotate("text", x = Inf, y = Inf, label = res_text, vjust = 1, hjust = 1)

  print(p2)
  
}


# Run the plotting function -----------------------------------------------

# map(ACBRS, run_bias_plots)

Running the sampbias null models

We compare the results from the fitted sampbias models with 20x replicates of a null model of random sampling across each ACBR. NOTE - This section of the code takes a long time to run.

Once again, we use modified code from Zizka et al. (2021). The only other changes are to generate the random samples.

run_null_analysis <- function(n) {
  
  # Trim the ice-free area to sample from to the ACBR
  ACBR_polygons <- ACBR_polygons %>% 
    filter(ACBR_ID == n) %>% 
    sf::st_buffer(., 200) # Buffer by 200m so we don't miss any points that are nearby
  
  # Extract the bio values just in the ACBR n, for the number of occurrences to randomly sample
  Ant_bio_filter <- st_filter(Ant_bio, ACBR_polygons)
  
  # Set format to match sampbias requirements
  ea_occ <- data.frame(species = Ant_bio_filter$scientificNameClean, st_coordinates(Ant_bio_filter))
  
  # Now need to trim ea_raster to just the designated ACBR
  ea_raster_terra <- terra::mask(ea_raster, ACBR_polygons, 
                                 updatevalue = NA) 
  # Convert to raster
  ea_raster <- raster(ea_raster_terra)
  
  # Set up empty result list
  null_result_list <- list()
  
  null_runs <- function() {
    
    # Generate the random bio points sample
    random_sample <- sf::st_sample(ACBR_polygons, # Using the sf feature 
                                   nrow(ea_occ),
                                   type = "random",
                                   exact = T) # Sample the exact number of points
    
    ea_null_occ <- data.frame(species = NA, st_coordinates(random_sample))
    
    # Set up initial conditions
    x = ea_null_occ
    gaz = bias_layers
    res = 1000
    buffer = NULL
    restrict_sample = NULL
    terrestrial = FALSE # Setting to false because I don't want it to trim the data via the coastline
    inp_raster = ea_raster
    mcmc_rescale_distances = 1000 # This says to rescale distances from m to km  
    mcmc_iterations = 1e+05
    mcmc_burnin = 2e+04
    mcmc_outfile = NULL
    prior_q = c(1, 0.01)
    prior_w = c(1, 1)
    plot_raster = FALSE 
    verbose = TRUE
    run_null_model = FALSE
    use_hyperprior = FALSE # Not using a hyperprior because only one bias factor being looked at
    
    # Convert x to SpatialPoints
    dat.pts <- sp::SpatialPoints(x[, c("X", "Y")])
    
    
    if(!is.null(inp_raster)){
      dum.ras <- inp_raster
    }else{
      dum.ras <- raster::raster(round(extent(dat.pts), .DecimalPlaces(res)))
      res(dum.ras) <- res
    }
    
    # Warning if combination of resolution and extent exceed 1mio grid cells
    if (raster::ncell(dum.ras) > 1e+06) {
      warning("Huge raster size")
    }
    
    # Occurrence raster
    if (verbose) {
      message("Creating occurrence raster...")
    }
    
    # Rasterize the occurrence points to the coast - study extent
    occ.out <- raster::rasterize(dat.pts, dum.ras, fun = "count")
    
    #occ.out <-  raster::trim(occ.out) ## Muted this as I don't want to trim it
    
    # Set all values to zero
    occ.out[is.na(occ.out)] <- 0
    
    if (terrestrial) { ## Have set this to false, not needed
      if(verbose){message("Adjusting to terrestrial surface...")}
      
      ## Get landmass
      if(!is.null(inp_raster)){
        wrld <- spTransform(sampbias::landmass, CRSobj = proj4string(inp_raster))
        wrld <- raster::crop(wrld, extent(occ.out))
        wrld <- raster::rasterize(wrld, occ.out)
      }else{
        wrld <- raster::crop(sampbias::landmass, extent(occ.out)) ## Landmass is a global coastlines vector, including Antarctica
        wrld <- raster::rasterize(wrld, occ.out)
      }
      ## Adjust raster
      occ.out <-  raster::mask(occ.out, wrld)
      occ.out <-  raster::trim(occ.out)
    }
    
  
    ## Adapt occurrence raster to custom study extent if provided
  # UPDATED from the package to run faster
  # Trim by just the ice-free area of the chosen ACBR
    occ.out.terra <- rast(occ.out)
    occ.out.terra <- terra::mask(occ.out.terra, ea_raster_terra, 
                                 updatevalue = NA) 
    
    # Occ.out is now just 0 for sites in the *chosen ACBR* that have no data 
    occ.out <- raster(occ.out.terra)
   
    # Removed the is.raster check because it wasn't recognising the rasters
    dis.ras <- gaz
    
    if (all(is.na(values(dis.ras[[1]])))) {
      stop("No valid distances found. Consider setting terrestrial = F")
    }
    
    # Plot the occurrence and distance rasters for diagnostics
    if(plot_raster){ #SET THIS TO FALSE, AS QUITE SLOW
      # plot(stack(dis.ras))
      plot(occ.out, main = "Occurrence raster")
      if(!is.null(restrict_sample)){
        plot(restrict_sample, add = TRUE)
      }
    }
    
    ## Check if a distance was found for any gazetteer, if not, only return occurrence raster
    
    if (verbose) {
      message("Estimating bias...")
    }
    
    
    if (is.logical(dis.ras)) { #returns FALSE
      out <- c(Occurences = occ.out)
      class(out) <- append("sampbias", class(out))
      return(out)
    } else {
      
      # Generate the data.frame with the counts and distances
      dis.vec <- lapply(dis.ras, "getValues") #get the distance values
      dis.vec <- as.data.frame(do.call(cbind, dis.vec))
      names(dis.vec) <- names(dis.ras)
      
      dis.vec <- data.frame(cell_id = seq_len(nrow(dis.vec)), # Match the cells with number of records + distance value
                            record_count = getValues(occ.out),
                            dis.vec)
      dis.vec <- dis.vec[complete.cases(dis.vec),] # This is where only ice-free areas from target ACBR are included
      
      # Counting the number of cells with and without records
      rec_count <- c(sum(dis.vec$record_count == 0), sum(dis.vec$record_count > 0))
      
      # Run the function
      out <- RunSampBias(x = dis.vec,
                         rescale_distances = mcmc_rescale_distances,
                         iterations = mcmc_iterations,
                         burnin = mcmc_burnin,
                         prior_q = prior_q,
                         prior_w = prior_w,
                         outfile = mcmc_outfile,
                         run_null_model = run_null_model,
                         use_hyperprior = use_hyperprior)
      
      # Create output file, a list of the class sampbias
      if (verbose) {
        message("Preparing output...")
      }
      
      # Generate the output object
      out <- list(summa = list(total_occ = nrow(x),
                               total_sp = length(unique(x$species)),
                               extent = extent(dum.ras),
                               res = res,
                               restrict_sample = restrict_sample,
                               rescale_distances = mcmc_rescale_distances,
                               data_availability = rec_count),
                  occurrences = occ.out,
                  bias_estimate = out,
                  distance_rasters = stack(dis.ras))
      class(out) <- append("sampbias", class(out))
      
      if (verbose) {
        message(" Done\n")
      }
      
      
    }
    
    
    # Save the result
    null_result_list <<- c(null_result_list, out)
    
    bias.out <- out
    
   ## MCMC has returned samples from the posterior, hence I take them from the bias.out object
  
   # Makes columns, it = iteration, likA = likelihood, priorA = prior, q is the q parameter, posterior=posterior
    plo1 <-  bias.out$bias_estimate %>%
      pivot_longer(cols = contains("w_"), names_to = "bias",
                   values_to = "posterior_estimate") %>%
      mutate(bias = gsub("w_", "", .data$bias)) %>%
      mutate(bias = fct_reorder(.data$bias, .data$posterior_estimate, .fun = median, .desc = FALSE))
    
    
    # Extract the 95% Credible Interval
    CI <- quantile(plo1$posterior_estimate, c(0.025, 0.975))
    
    ACBR_bias <<- ACBR_bias %>% add_row(ACBR_ID = as.factor(n),
                                        type = "Simulation",
                                        mean = mean(plo1$posterior_estimate),
                                        lower = as.numeric(CI[1]),
                                        upper = as.numeric(CI[2]))
    
    
    # Plots the posterior distribution of w
    p1 <- ggplot(plo1, aes(x = posterior_estimate))+
      geom_density(linewidth = 0.6, fill = "lightblue", alpha = 0.7)+
      xlab("Bias weight")+
      ylab("Density")+
      theme(axis.title.x = element_text(size = 22),
            axis.title.y = element_text(size = 22),
            axis.text.x = element_text(size=18),
            axis.text.y = element_text(size=18))+
      theme_classic()
    
    
    res_text <- paste0("NULL ACBR ", n)
    
    p1 <- p1 + annotate("text", x = Inf, y = Inf, label = res_text, vjust = 1, hjust = 1) + geom_vline(aes(xintercept=mean(posterior_estimate)),                                                                                                       linetype = "dashed", size =0.72)
    
    print(p1)
    
    plo2_w <-colMeans(bias.out$bias_estimate) # Get the mean of each column
    plo2_dist <- seq(1,1000,length.out=1000) # Distances from bias 1 - 1000 km
    
    
    plo2_95 <- sapply(bias.out$bias_estimate[,4:5], function(x) quantile(x, c(0.025, 0.975)))
    plo2_w <- c(plo2_w, plo2_95[1,1], plo2_95[2,1], plo2_95[1,2], plo2_95[2,2])
    
    names <- c("it", "likA", "priorA", "q", "w_", "hp_rate", "q_lower", "q_upper", "w_lower", "w_upper")
    plo2_w <- setNames(plo2_w, names)
    
    plo2 <- data.frame(dist = plo2_dist,
                       meanrate = plo2_w[4] * exp(-plo2_w[5]*plo2_dist),
                       lower = plo2_w[7] * exp(-plo2_w[9]*plo2_dist),
                       upper = plo2_w[8] * exp(-plo2_w[10]*plo2_dist),
                       id = names(bias.out$bias_estimate)[-c(1:4, ncol(bias.out$bias_estimate))]) 
    
    # Plot the fitted bias function
    ggplot(plo2)+
      geom_line(aes(x = .data$dist, y = .data$meanrate))+
      geom_point(aes(x = .data$dist, y = .data$meanrate), size = 0.25)+
      scale_color_viridis(discrete = TRUE)+
      xlab("Null - Distance to bias (km)")+
      ylab("Sampling rate")+
      geom_errorbar(aes(.data$dist, ymin = lower, ymax = upper), color = "gray", alpha = 1) +
      xlim(0, 150) +
      theme_classic()+
      theme(panel.grid.minor.x = element_blank(),
            panel.grid.major.y = element_blank(),
            legend.title = element_blank(),
            legend.position = "bottom",
            axis.text = element_text(size = 18),
            axis.title = element_text(size = 20))
    
    
     
  }
  
  #Run the function over 20 replicates
  map(seq_len(20), ~null_runs())
  
}

# RUN THE NULL MODEL ------------------------------------------------------

# map(ACBRS, run_null_analysis)

Plotting null and empirical

# ACBR_bias_plot <- ggplot(data = ACBR_bias, aes(y = ACBR_ID, x = mean)) +
#   geom_point(data = subset(ACBR_bias, type %in% "Simulation"), aes(color = type), size = 2, position = position_dodgev(height = 0.01)) +
#   geom_errorbarh(data = subset(ACBR_bias, type %in% "Simulation"), aes(xmin = lower, xmax = upper, color = type), width = 0.1, size = 0.4, alpha = 1) +
#   geom_point(data = subset(ACBR_bias, type %in% "Empirical"), aes(color = type), size = 2, position = position_dodgev(height = 0.01)) +
#   geom_errorbarh(data = subset(ACBR_bias, type %in% "Empirical"), aes(xmin = lower, xmax = upper, color = type), width = 0.1, size = 0.4, alpha = 1) +
#   theme_classic() +
#   labs(x = expression(paste("Posterior ", italic("w"))), y = "Antarctic Conservation Bioregion")  + # define axis label
#   theme(legend.position = c(.82, .9)) +
#   scale_y_continuous(breaks = seq(1,16,1)) +
#   scale_color_manual(name = "",
#                      labels = c("Fitted", "Null"),
#                      values = c("#0d8aadff", "#75044fff"))

# write.csv(ACBR_bias, here("Outputs/ACBR_sampbias_output_AUG_23.csv"))

# ggsave(ACBR_bias_plot, filename = here("Outputs/ACBR_bias_plot_AUG_23.png"), w = 17, h = 12, units = "cm", dpi = 600, device = "png")

References

Terauds, A., Lee, J.R., 2016. Antarctic biogeography revisited: updating the Antarctic Conservation Biogeographic Regions. Diversity and Distributions 22, 836–840. https://doi.org/10.1111/ddi.12453

Zizka, A., Antonelli, A., Silvestro, D., 2021. sampbias, a method for quantifying geographic sampling biases in species distribution data. Ecography 44, 25–32. https://doi.org/10.1111/ecog.05102

Session information

sessionInfo()
## R version 4.2.3 (2023-03-15 ucrt)
## Platform: x86_64-w64-mingw32/x64 (64-bit)
## Running under: Windows 10 x64 (build 19045)
## 
## Matrix products: default
## 
## locale:
## [1] LC_COLLATE=English_Australia.utf8  LC_CTYPE=English_Australia.utf8   
## [3] LC_MONETARY=English_Australia.utf8 LC_NUMERIC=C                      
## [5] LC_TIME=English_Australia.utf8    
## 
## attached base packages:
## [1] stats     graphics  grDevices utils     datasets  methods   base     
## 
## other attached packages:
##  [1] here_1.0.1        ggstance_0.3.6    viridis_0.6.2     viridisLite_0.4.1
##  [5] raster_3.6-20     sf_1.0-12         terra_1.7-18      rgdal_1.6-5      
##  [9] sampbias_1.0.4    rgeos_0.6-2       sp_1.6-0          lubridate_1.9.2  
## [13] forcats_1.0.0     stringr_1.5.0     dplyr_1.1.1       purrr_1.0.1      
## [17] readr_2.1.4       tidyr_1.3.0       tibble_3.2.1      ggplot2_3.4.2    
## [21] tidyverse_2.0.0  
## 
## loaded via a namespace (and not attached):
##  [1] Rcpp_1.0.10        lattice_0.20-45    class_7.3-21       rprojroot_2.0.3   
##  [5] digest_0.6.31      utf8_1.2.3         R6_2.5.1           evaluate_0.20     
##  [9] e1071_1.7-13       pillar_1.9.0       rlang_1.1.0        rstudioapi_0.14   
## [13] jquerylib_0.1.4    rmarkdown_2.21     munsell_0.5.0      proxy_0.4-27      
## [17] compiler_4.2.3     xfun_0.38          pkgconfig_2.0.3    htmltools_0.5.5   
## [21] tidyselect_1.2.0   gridExtra_2.3      codetools_0.2-19   fansi_1.0.4       
## [25] tzdb_0.3.0         withr_2.5.0        grid_4.2.3         jsonlite_1.8.4    
## [29] gtable_0.3.3       lifecycle_1.0.3    DBI_1.1.3          magrittr_2.0.3    
## [33] units_0.8-1        scales_1.2.1       KernSmooth_2.23-20 cli_3.6.1         
## [37] stringi_1.7.12     cachem_1.0.7       bslib_0.4.2        generics_0.1.3    
## [41] vctrs_0.6.1        cowplot_1.1.1      tools_4.2.3        glue_1.6.2        
## [45] hms_1.1.3          fastmap_1.1.1      yaml_2.3.7         timechange_0.2.0  
## [49] colorspace_2.1-0   classInt_0.4-9     knitr_1.42         sass_0.4.5