This is supporting information for the Methods in Ecology and Evolution article entitled, Integrated SDM database: enhancing the relevance and utility of species distribution models in conservation management, by Veronica F. Frans*, Amélie A. Augé, Jim Fyfe, Yuqian Zhang, Nathan McNally, Hendrik Edelhoff, Niko Balkenhol, & Jan O. Engler. Please contact the corresponding author (*) for any inquiries.

1. Methods summary

Here, we exemplify how to use the final integrated SDM database from Appendix S6 to query sites of interest and extract or summarise data fields across sites. We also show how to visualise these queries. In particular, we focus on sites near current New Zealand sea lion (Phocarctos hookeri; NZSL) pupping locations.

This script is intended for use by the database’s end-users to gather further information on specific locations, or to assist in site prioritisations.

The final products from this script are as follows:

  1. Data tables (CSV format) summarising sites (mainland in general and sites near pupping locations).

  2. A map of the sites located near pupping locations and their proximity to human impact areas.

2. R Setup

The script presented here was done using R (version 4.0.5; R Core Team 2021) and its packages.

2.1. Libraries

# libraries
  library("raster")       # raster data
  library("rgdal")        # input/output; projections; reading ASCII files
  library("tidyverse")    # plots and working with complex tables
  library("RColorBrewer") # colours for graphics
  library("plyr")         # sort data
  library("reshape")      # sort data
  library("reshape2")     # sort data
  library("rgeos")        # polygon and point conversions
  library('tmap')         # visualizing maps
  library("cowplot")      # multiplots
  library("grid")         # multiplots
  library("png")          # display exported images in Rmarkdown

2.2. Directories

We create and use multiple folders to organise our inputs and outputs. Similar directories can be made anywhere by only changing the root directory object, dir.

# Root directory
  dir <- c('G:\\R\\NZSL_MSSDM_MCA_2019_NZ\\')

# Data directory
  dat.dir <- paste0(dir,'data')

# Intermediate data and layers
  dir.create(paste0(dir,'data\\intermediate'),recursive=TRUE)
  int.dir <- paste0(dir,'data\\intermediate')  # no '\\' here for read/writeOGR

# Intermediate layers that have been clipped
  dir.create(paste0(int.dir,'\\clipped'),recursive=TRUE)
  clp.dir <- paste0(int.dir,'\\clipped')  # no '\\' here for read/writeOGR
  
# Final layers
  dir.create(paste0(dir,'layers'),recursive=TRUE)
  lay.dir <- paste0(dir,'layers')  # no '\\' here for read/writeOGR
  
# Final tables
  dir.create(paste0(dir,'tables'),recursive=TRUE)
  tab.dir <- paste0(dir,'tables\\')

# Summary tables
  dir.create(paste0(dir,'tables\\summaries'),recursive=TRUE)
  sum.dir <- paste0(dir,'tables\\summaries\\')
  
# Final figures
  dir.create(paste0(dir,'figures'),recursive=TRUE)
  fig.dir <- paste0(dir,'figures\\')

2.3 Colours

Colourblind-friendly colours for plotting.

# colours for state values (0-111)
  state_cols <-c('#BBBBBB','#DC050C','#882E72','#1965B0','#6195CF','#4EB265',
                 '#90C987','#F7CB45','#EE8026')

# colours for thresholds or binary plots
  thresh_cols <- c('#DDCC77','#CC3311')
  
# colours
  cat_cols <- c("#a65628","#f781bf")
  cont_cols <- c("#ff7f00","#ffff33","#377eb8","#e41a1c","#4daf4a")
  slope_cols <- c("#984ea3")
  var_cols <- c("#ff7f00", # cliff
                "#ffff33", # coast
                "#377eb8", # forest
                "#e41a1c", # grass
                "#4daf4a", # land cover
                "#984ea3", # sand
                "#a65628", # slope
                "#f781bf") # water
  
# function to explore colours for tmap:
  #tmaptools::palette_explorer()

2.4 Functions

2.4.1 Calculate mode

Calc.mode()

Calculate the mode of a categorical feature.

# calculate the mode of a categorical feature
  calc.mode <- function(x) {
     uniq_vals <- unique(x)
     uniq_vals[which.max(tabulate(match(x, uniq_vals)))]
  }

2.4.2 Get variable names

get.name()

Extract the name of an inputted variable to convert into a string and use for naming files that are saved.

# turn variable name into a string (for use in file naming)
  get.name <- function(x){
             y <- deparse(substitute(x))
             return(y)
  }

2.4.3 Load spatial data and ensure projection

get.shp()

Load points or polygons and project. The default projection is set to NZDG2000/New Zealand Transverse Mercator 2000 (EPSG: 2193).

# load point/polygon and project
  get.shp <- function(p_name,
                      # defaults
                      polydir=dat.dir, prj=c("+init=epsg:2193")){
    # read shapefile
      pol <- readOGR(polydir, paste0(p_name))
     
    # project and return point/polygon
      pol <- spTransform(pol, CRS(paste0(prj)))
      return(pol)
  }

get.ras()

Load rasters and project. The default projection is set to NZDG2000/New Zealand Transverse Mercator 2000 (EPSG: 2193).

# load raster and project
  get.ras <- function(r_name,
                      # defaults
                      ras_dir=dat.dir, prj=c("+init=epsg:2193")){
      # read raster
        r <- raster(paste0(ras_dir,"\\",r_name))
      
      # project and return raster
        crs(r) <- CRS(paste0(prj))
        return(r)
  }

3. Load data

Load suitable site polygons (minimum 35 females) created in Appendix S6 under the name, NZSL_integrated_SDM_database.shp.

# load and ensure projections
  sites <- get.shp('NZSL_integrated_SDM_database', polydir = lay.dir)
# show column names
  colnames(sites@data)
##  [1] "site_ID"    "main_isld"  "region"     "DOC_region" "area"      
##  [6] "DOC_code"   "DOC_name"   "DOC_size"   "DOC_pc"     "S1_area_pc"
## [11] "S2_area_pc" "S3_area_pc" "S1_limits"  "S2_limits"  "S3_limits" 
## [16] "S1_uncrt"   "S2_uncrt"   "S3_uncrt"   "MESS_class" "dissim_var"
## [21] "hum_im_pc"  "rd_sl_mi"   "rd_unsl_mi" "fences"     "graze_pc"  
## [26] "in_watr_mi" "in_watr_me" "in_watr_mx" "pine_pc"    "curr_NZSL" 
## [31] "histr_NZSL" "X"          "Y"          "id"

Base map items for visualisation.

# NZ polygons
  NZ_polygon <- get.shp('NZ_polygon_dissolved')

# NZ inland water bodies
  NZ_water <- get.shp('water_polygons_all_NZ')

Current NZSL pupping location data.

# get shapefiles
  NZSL_current <- get.shp('current_NZSL_locations_9-20-2019',polydir = int.dir)

# keep name columns only
  NZSL_current <- (NZSL_current)[5]
  
# add column for location type
  NZSL_current$loc_type <- '1994-2019 (Dec - Mar)'

Human impact polygons from the multi-criteria decision analysis (MCDA).

# load human impact areas again
  human_impact_areas <- get.shp('human_impact_areas',polydir = int.dir)

# change column name
  human_impact_areas$hum_imp <- 'human impact areas'

Planted pine forest polygons.

# planted pine forests
  pine <- get.shp('planted_pine')

# change column name
  pine$SUBID_2016 <- 'planted pine forest'

4. Summarising all sites in the study area

First, we show how to summarise all sites, sorted by DOC (Department of Conservation) management regions and North versus South Island.

4.1 How many suitable sites per DOC region and what are their average sizes?

# summaries per region
  reg <- ddply(sites@data,.(DOC_region),
               summarize,
               count=length(site_ID), #total number of sites per region
               ttl_area=sum(area),    #total area size in km
               DOC_perc=(sum(DOC_size, na.rm=TRUE)/sum(area))*100, #% area under DOC
               mean_size=mean(area),  #mean size across region sites
               sd_size=sd(area),      #sd size across region sites
               min_size=min(area),    #minimum size across region sites
               max_size=max(area)     #maximum size across region sites
               ) %>% 
               mutate_if(is.numeric, round, digits=2) #round the outputs
# show here
   knitr::kable(reg)
DOC_region count ttl_area DOC_perc mean_size sd_size min_size max_size
Central North Island 40 68.55 6.19 1.71 1.14 0.43 4.92
Eastern South Island 33 49.46 22.08 1.50 2.15 0.36 11.16
Hauraki-Waikato-Taranaki 45 63.87 7.58 1.42 1.22 0.36 6.94
Lower North Island 42 81.95 7.16 1.95 2.23 0.36 9.68
Northern North Island 100 275.62 14.44 2.76 5.98 0.35 49.64
Northern South Island 48 86.09 10.12 1.79 4.35 0.38 30.14
Southern South Island 36 74.95 28.22 2.08 2.87 0.37 14.39
Western South Island 51 97.70 35.50 1.92 1.86 0.43 8.12

4.2 How many suitable sites per main island?

# summaries per island
  isl <- ddply(sites@data,.(main_isld),
               summarize,
               count=length(site_ID), #total number of sites per island
               ttl_area=sum(area),    #total area size in km
               DOC_perc=(sum(DOC_size, na.rm=TRUE)/sum(area))*100, #% area under DOC
               mean_size=mean(area),  #mean size across island sites
               sd_size=sd(area),      #sd size across island sites
               min_size=min(area),    #minimum size across island sites
               max_size=max(area)     #maximum size across island sites
               ) %>% 
               mutate_if(is.numeric, round, digits=2) #round the outputs
# show here
   knitr::kable(isl)
main_isld count ttl_area DOC_perc mean_size sd_size min_size max_size
North Island 227 489.99 11.17 2.16 4.17 0.35 49.64
South Island 168 308.20 24.48 1.83 3.00 0.36 30.14

4.3 How certain are we of the site predictions?

Here, we can summarise the range of uncertainty for each state prediction.

# summaries for sites near historic locations
  cv.sum <- ddply(sites@data,.(),
                  summarize,
                  mean_S1=mean(S1_uncrt), #mean CV across island sites
                  mean_S2=mean(S2_uncrt), #mean CV across island sites
                  mean_S3=mean(S3_uncrt), #mean CV across island sites
                  sd_S1=sd(S1_uncrt),     #sd CV across island sites
                  sd_S2=sd(S2_uncrt),     #sd CV across island sites
                  sd_S3=sd(S3_uncrt),     #sd CV across island sites                  
                  min_S1=min(S1_uncrt),   #minimum CV across island sites
                  min_S2=min(S2_uncrt),   #minimum CV across island sites
                  min_S3=min(S3_uncrt),   #minimum CV across island sites
                  max_S1=max(S1_uncrt),   #maximum CV across island sites
                  max_S2=max(S2_uncrt),   #maximum CV across island sites
                  max_S3=max(S3_uncrt)    #maximum CV across island sites 
                  ) %>% 
                   mutate_if(is.numeric, round, digits=2) #round the outputs
# transform
  cv.sum <- melt(cv.sum)
  cv.sum <- cv.sum[,-1]
  colnames(cv.sum)[2] <- 'CV_percent'
  
# show here
  knitr::kable(cv.sum)
variable CV_percent
mean_S1 1.12
mean_S2 0.46
mean_S3 0.59
sd_S1 0.38
sd_S2 0.12
sd_S3 0.28
min_S1 0.49
min_S2 0.13
min_S3 0.20
max_S1 2.20
max_S2 0.79
max_S3 1.59

4.4 How many sites per region are within 10km of current/historic pupping locations?

# summaries per region
  cur_hist <- ddply(sites@data,.(DOC_region),
                   summarize,
                   ttl_curr=sum(!is.na(curr_NZSL)), #number sites within curr.
                   ttl_hist=sum(!is.na(histr_NZSL)), #number sites within hist.
                   perc_curr=sum(!is.na(curr_NZSL))/
                             length(site_ID), #% sites within current breeding locations
                   perc_hist=sum(!is.na(histr_NZSL))/
                             length(site_ID) #% sites within historical breeding locs
                   ) %>% 
                   mutate_if(is.numeric, round, digits=2) #round the outputs

# show here
   knitr::kable(cur_hist)
DOC_region ttl_curr ttl_hist perc_curr perc_hist
Central North Island 0 0 0.00 0.00
Eastern South Island 9 0 0.27 0.00
Hauraki-Waikato-Taranaki 0 0 0.00 0.00
Lower North Island 0 1 0.00 0.02
Northern North Island 0 6 0.00 0.06
Northern South Island 0 4 0.00 0.08
Southern South Island 10 0 0.28 0.00
Western South Island 0 0 0.00 0.00

5. Extract sites within 10km of current NZSL pupping sightings and summarise

Only select sites where the column curr_NZSL has data (not NA).

# subset the sites within current pupping location ranges
  sites_curr_loc <- sites[!is.na(sites@data$curr_NZSL),]

From this, we can then make the following short queries:

# how many sites?
  length(sites_curr_loc)
## [1] 19
# list names of sites
  select(sites_curr_loc@data, DOC_region, site_ID)

5.1 How many sites are near each pupping location?

Since the curr_NZSL column is a semi-colon-separated (;) list of multiple pupping site location names, we first separate them into duplicate rows. Then we summarise by pupping location.

# split into mulple rows
  curr_long <- separate_rows(sites_curr_loc@data, curr_NZSL, sep = "; ",
                            convert = TRUE)

# convert to factor
  curr_long$curr_NZSL <- as.factor(curr_long$curr_NZSL)
  
# summaries of pupping locations
  pup <- ddply(curr_long,.(curr_NZSL),
               summarize,
               ttl_curr=length(curr_NZSL)  #number sites within curr.
               )
# show here
  knitr::kable(pup)
curr_NZSL ttl_curr
Akatore Creek 3
Allans Beach 3
Boulder Beach 2
Cabbage Point 1
Catlins Lake 1
Hoopers Inlet 3
Kaka Point 4
North Arm 1
Nugget Point 2
Porpoise Bay 2
Port Pegasus/Pikihatiti 1
Purakaunui Bay 1
Purehurehu Point 3
Sandfly Bay 4
Smaills Beach 1
South Arm 1
Surat Bay 1
Tahakopa Bay 3
Victory Beach 2
Warrington 4

This table can also be viewed as a bar chart.

# colours
  colourCount = length(unique(pup$curr_NZSL))
  getPalette = colorRampPalette(brewer.pal(9, "Spectral"))
  
# plot site frequencies per pupping location
  freq_sites <- ggplot(pup) + 
                aes(x=curr_NZSL, y=ttl_curr, fill=factor(curr_NZSL)) + 
                geom_bar(stat='identity') +
                scale_fill_manual(values = getPalette(colourCount)) +
                ylab('count') + xlab('current pupping locations') +
                ggtitle('Number of sites within 10km of known pupping locations') +
                theme(axis.text.x=element_text(angle = 45, vjust = .9, hjust = .9),
                      legend.position = 'none')

# display here
  freq_sites

5.2 Quick map

Here is a quick view of the sites.

# plot sites
  plot(sites_curr_loc)

A better way to plot these sites is shown in the next section.

6. Mapping the sites

6.1 Base map

Get extent of current NZSL pupping locations, which will be used for the map.

# get extent
  extent(sites_curr_loc)
## class      : Extent 
## xmin       : 1190875 
## xmax       : 1424325 
## ymin       : 4758650 
## ymax       : 4946625

Create a background polygon that is clipped to this area. We use the above extent to first create a raster and then a polygon.

# create background polygon
  bkg <- as(raster::extent(c(1190875,1454325,4758650,4956625)), "SpatialPolygons")
  proj4string(bkg) <- paste0("+init=epsg:2193 +proj=tmerc +lat_0=0 +lon_0=173 +k=0.9996",
                             " +x_0=1600000 +y_0=10000000 +ellps=GRS80 ",
                             "+towgs84=0,0,0,0,0,0,0 +units=m +no_defs")
  bkg <- spTransform(bkg, CRS("+init=epsg:2193"))

Make a base map with the background polygon as the extent, and the NZ and inland water polygons as the overlain layers.

# set mode
  tmap_mode("plot")

# base map
  # background polygon
    bkg_map <- tm_shape(bkg) +
               tm_borders('white', alpha = 0) +
               tm_layout(frame=FALSE, bg.color = NA)
  
  # NZ polygon
    NZ_map <- tm_shape(NZ_polygon) + 
              tm_polygons(col='#DDDDDD', border.alpha = 0) +
              tm_legend(show=FALSE)
    
  # inland water bodies
    wtr_map <- tm_shape(NZ_water) +
               tm_fill(col='#33BBEE',
                       palette = '#33BBEE', alpha = 1) +
               tm_legend(show=FALSE)
    
  # combine
    base_map <- bkg_map + NZ_map + wtr_map

We can view the base map here.

# display here
  base_map

6.2 Human impact polygons

Make a map layer of human impact area polygons.

# human impact areas
  hum_map <- tm_shape(human_impact_areas) +
             tm_fill(col='hum_imp',
                     palette='#EE7733',alpha = 1,
                     title = '', legend.show = FALSE)

We can view it here, lain over the base map.

# display here
  base_map + hum_map

6.3 Sites and current pupping locations

Make a map layer of suitable sites polygons.

# make suitable sites polygon layers
  suit_map <- tm_shape(sites_curr_loc) +
              tm_polygons(border.col='#000000', lwd=1, alpha = 0, border.alpha = 1,
                          title = 'suitable sites', legend.show = FALSE)

Make a map layer of current pupping locations.

# points for current sighting locations
  nzsl_pts <- tm_shape(NZSL_current) +
              tm_dots(col = 'loc_type', size = 0.08,
                      palette=c('#882E72'), shape = 17,
                      title = 'known female/pup locations', legend.show = FALSE)

6.4 Legend

Create legend of all the objects.

# legend only
  leg <- bkg_map +
        # suitable sites
          tm_add_legend(type = c("symbol"),
                  labels = 'Suitable sites',
                  col = '#000000',
                  size = .5, shape = 0,
                  title = '') +
        # NZSL sightings
          tm_add_legend(type = c("symbol"),
                  labels = 'Current pupping sites',
                  col = '#882E72',#'#33BBEE',
                  size = .3, shape = 17,#19,
                  title = '') +
        # human impacts
          tm_add_legend(type = c("symbol"),
                labels = 'Human impact areas',
                col = '#EE7733',#'#EE6677',
                alpha = 1,
                size = .5, shape = 15,
                title = '') +
        # inland water bodies
          tm_add_legend(type = c("symbol"),
                labels = 'Inland water bodies',
                col = '#33BBEE',
                alpha = 1,
                size = .5, shape = 15,
                title = '') +
        # layout
          tm_layout(legend.show = TRUE,
                   legend.position = c('right','bottom'),
                   frame=FALSE,
                   bg.color = NA)

6.5 Combined map

Combine all map layers and legend into one map.

site_map <- # maps
            base_map + hum_map + nzsl_pts + suit_map + 
            # legend
            leg +
            # scalebar
            tm_scale_bar(breaks = c(0,10,20), size = 0.5,
                           position = c(.4,0))

Save map.

# save image ("plot" mode)
  tmap_save(site_map, filename = paste0(fig.dir,"current_sites_zoom.png"),
            height = 4, width=5, units = 'in', dpi=600)

Display here.

knitr::include_graphics(paste0(fig.dir,"current_sites_zoom.png"))

7. Subsetted data table of the sites

Make a new dataframe of certain columns within the database and extract as a CSV table (these were used for Table 2 in the corresponding manuscript).

# dataframe select by column
  curr_df <- select(sites_curr_loc@data,
                    c(site_ID, # site name information
                      area,    # site size information
                      DOC_pc,  # existing management information
                      hum_im_pc, graze_pc, fences,     # human impacts
                      S1_limits, S2_limits, S3_limits, # restoration features
                      MESS_class, dissim_var,          # model uncertainty
                      S1_uncrt, S2_uncrt, S3_uncrt
                      ))

# change NAs to 0
  curr_df$DOC_pc[is.na(curr_df$DOC_pc)] <- 0
  curr_df$graze_pc[is.na(curr_df$graze_pc)] <- 0

Change some of the data inside to compress the table.

# Change names
  curr_df$fences <- mapvalues(curr_df$fences, 
                              from=c('absent','present'),
                              to=c('N','Y'))
# Remove words
  curr_df <- data.frame(lapply(curr_df,
                               function(x) {gsub(" distance","",x,ignore.case = TRUE)}))
  curr_df <- data.frame(lapply(curr_df,
                               function(x) {gsub("inland ","",x,ignore.case = TRUE)}))
  curr_df <- data.frame(lapply(curr_df,
                               function(x) {gsub("land cover","LC",x,ignore.case = TRUE)}))
# inspect final table
   knitr::kable(curr_df)
site_ID area DOC_pc hum_im_pc graze_pc fences S1_limits S2_limits S3_limits MESS_class dissim_var S1_uncrt S2_uncrt S3_uncrt
ESI-21 1.76 3.43 33.82 10.69 N sand grass coast low slope 1.27 0.36 0.26
ESI-22 0.71 0.00 0.00 35.16 N sand sand coast low LC 1.28 0.28 0.38
ESI-23 0.45 0.00 0.00 0.07 N grass grass grass none slope 0.75 0.57 0.74
ESI-24 0.79 0.00 100.00 0.55 Y grass water water low water 0.63 0.58 0.82
ESI-25 2.11 0.00 0.00 1.78 Y sand sand grass low slope 1.40 0.43 0.36
ESI-26 0.37 5.83 0.00 16.12 N grass grass grass low sand 0.96 0.40 0.59
ESI-31 0.48 0.84 0.00 9.99 Y sand grass grass low sand 1.35 0.32 0.52
ESI-32 0.57 20.95 0.00 12.40 Y sand sand sand low sand 1.42 0.38 0.22
ESI-33 3.79 3.49 0.00 41.40 Y sand grass grass low LC 1.31 0.38 0.51
SSI-5 0.40 0.00 0.00 7.31 N grass grass grass none sand 0.68 0.52 0.78
SSI-6 1.48 4.52 0.00 36.75 Y sand grass grass low LC 1.32 0.43 0.39
SSI-8 2.26 61.29 0.00 15.23 Y sand sand water low sand 1.35 0.23 0.34
SSI-13 1.13 12.20 0.00 13.40 N grass water water low water 1.08 0.44 0.57
SSI-15 1.49 54.33 0.00 4.81 Y sand sand water low slope 1.61 0.36 0.30
SSI-17 0.86 23.88 0.00 16.55 N grass grass grass none sand 1.19 0.42 0.52
SSI-20 0.84 59.57 0.00 7.32 N sand sand grass none sand 1.85 0.48 0.34
SSI-24 1.24 5.47 83.86 41.56 Y grass grass grass low LC 1.12 0.43 0.55
SSI-25 0.46 61.74 0.00 0.53 N grass grass grass none sand 0.61 0.43 0.78
SSI-36 0.51 16.17 0.00 0.00 N grass water water intermediate water 0.58 0.64 0.98

Save as CSV.

# save as csv
  write.csv(curr_df,paste0(tab.dir,'evaluation_sites_FORMATTED.csv'), row.names = FALSE)

8. Save workspace

# save workspace to load later if needed
  save.image("G:/R/sites_eval.RData")