Â
This is supporting information for the Methods in Ecology and Evolution article entitled, Integrated SDM database: enhancing the relevance and utility of species distribution models in conservation management, by Veronica F. Frans*, Amélie A. Augé, Jim Fyfe, Yuqian Zhang, Nathan McNally, Hendrik Edelhoff, Niko Balkenhol, & Jan O. Engler. Please contact the corresponding author (*) for any inquiries.
Here, we exemplify how to use the final integrated SDM database from Appendix S6 to query sites of interest and extract or summarise data fields across sites. We also show how to visualise these queries. In particular, we focus on sites near current New Zealand sea lion (Phocarctos hookeri; NZSL) pupping locations.
This script is intended for use by the database’s end-users to gather further information on specific locations, or to assist in site prioritisations.
The final products from this script are as follows:
Data tables (CSV format) summarising sites (mainland in general and sites near pupping locations).
A map of the sites located near pupping locations and their proximity to human impact areas.
The script presented here was done using R (version 4.0.5; R Core Team 2021) and its packages.
# libraries
library("raster") # raster data
library("rgdal") # input/output; projections; reading ASCII files
library("tidyverse") # plots and working with complex tables
library("RColorBrewer") # colours for graphics
library("plyr") # sort data
library("reshape") # sort data
library("reshape2") # sort data
library("rgeos") # polygon and point conversions
library('tmap') # visualizing maps
library("cowplot") # multiplots
library("grid") # multiplots
library("png") # display exported images in Rmarkdown
We create and use multiple folders to organise our inputs and outputs. Similar directories can be made anywhere by only changing the root directory object, dir
.
# Root directory
dir <- c('G:\\R\\NZSL_MSSDM_MCA_2019_NZ\\')
# Data directory
dat.dir <- paste0(dir,'data')
# Intermediate data and layers
dir.create(paste0(dir,'data\\intermediate'),recursive=TRUE)
int.dir <- paste0(dir,'data\\intermediate') # no '\\' here for read/writeOGR
# Intermediate layers that have been clipped
dir.create(paste0(int.dir,'\\clipped'),recursive=TRUE)
clp.dir <- paste0(int.dir,'\\clipped') # no '\\' here for read/writeOGR
# Final layers
dir.create(paste0(dir,'layers'),recursive=TRUE)
lay.dir <- paste0(dir,'layers') # no '\\' here for read/writeOGR
# Final tables
dir.create(paste0(dir,'tables'),recursive=TRUE)
tab.dir <- paste0(dir,'tables\\')
# Summary tables
dir.create(paste0(dir,'tables\\summaries'),recursive=TRUE)
sum.dir <- paste0(dir,'tables\\summaries\\')
# Final figures
dir.create(paste0(dir,'figures'),recursive=TRUE)
fig.dir <- paste0(dir,'figures\\')
Colourblind-friendly colours for plotting.
# colours for state values (0-111)
state_cols <-c('#BBBBBB','#DC050C','#882E72','#1965B0','#6195CF','#4EB265',
'#90C987','#F7CB45','#EE8026')
# colours for thresholds or binary plots
thresh_cols <- c('#DDCC77','#CC3311')
# colours
cat_cols <- c("#a65628","#f781bf")
cont_cols <- c("#ff7f00","#ffff33","#377eb8","#e41a1c","#4daf4a")
slope_cols <- c("#984ea3")
var_cols <- c("#ff7f00", # cliff
"#ffff33", # coast
"#377eb8", # forest
"#e41a1c", # grass
"#4daf4a", # land cover
"#984ea3", # sand
"#a65628", # slope
"#f781bf") # water
# function to explore colours for tmap:
#tmaptools::palette_explorer()
Calculate the mode of a categorical feature.
# calculate the mode of a categorical feature
calc.mode <- function(x) {
uniq_vals <- unique(x)
uniq_vals[which.max(tabulate(match(x, uniq_vals)))]
}
Extract the name of an inputted variable to convert into a string and use for naming files that are saved.
# turn variable name into a string (for use in file naming)
get.name <- function(x){
y <- deparse(substitute(x))
return(y)
}
Load points or polygons and project. The default projection is set to NZDG2000/New Zealand Transverse Mercator 2000 (EPSG: 2193).
# load point/polygon and project
get.shp <- function(p_name,
# defaults
polydir=dat.dir, prj=c("+init=epsg:2193")){
# read shapefile
pol <- readOGR(polydir, paste0(p_name))
# project and return point/polygon
pol <- spTransform(pol, CRS(paste0(prj)))
return(pol)
}
Load rasters and project. The default projection is set to NZDG2000/New Zealand Transverse Mercator 2000 (EPSG: 2193).
# load raster and project
get.ras <- function(r_name,
# defaults
ras_dir=dat.dir, prj=c("+init=epsg:2193")){
# read raster
r <- raster(paste0(ras_dir,"\\",r_name))
# project and return raster
crs(r) <- CRS(paste0(prj))
return(r)
}
Load suitable site polygons (minimum 35 females) created in Appendix S6 under the name, NZSL_integrated_SDM_database.shp.
# load and ensure projections
sites <- get.shp('NZSL_integrated_SDM_database', polydir = lay.dir)
# show column names
colnames(sites@data)
## [1] "site_ID" "main_isld" "region" "DOC_region" "area"
## [6] "DOC_code" "DOC_name" "DOC_size" "DOC_pc" "S1_area_pc"
## [11] "S2_area_pc" "S3_area_pc" "S1_limits" "S2_limits" "S3_limits"
## [16] "S1_uncrt" "S2_uncrt" "S3_uncrt" "MESS_class" "dissim_var"
## [21] "hum_im_pc" "rd_sl_mi" "rd_unsl_mi" "fences" "graze_pc"
## [26] "in_watr_mi" "in_watr_me" "in_watr_mx" "pine_pc" "curr_NZSL"
## [31] "histr_NZSL" "X" "Y" "id"
Base map items for visualisation.
# NZ polygons
NZ_polygon <- get.shp('NZ_polygon_dissolved')
# NZ inland water bodies
NZ_water <- get.shp('water_polygons_all_NZ')
Current NZSL pupping location data.
# get shapefiles
NZSL_current <- get.shp('current_NZSL_locations_9-20-2019',polydir = int.dir)
# keep name columns only
NZSL_current <- (NZSL_current)[5]
# add column for location type
NZSL_current$loc_type <- '1994-2019 (Dec - Mar)'
Human impact polygons from the multi-criteria decision analysis (MCDA).
# load human impact areas again
human_impact_areas <- get.shp('human_impact_areas',polydir = int.dir)
# change column name
human_impact_areas$hum_imp <- 'human impact areas'
Planted pine forest polygons.
# planted pine forests
pine <- get.shp('planted_pine')
# change column name
pine$SUBID_2016 <- 'planted pine forest'
First, we show how to summarise all sites, sorted by DOC (Department of Conservation) management regions and North versus South Island.
# summaries per region
reg <- ddply(sites@data,.(DOC_region),
summarize,
count=length(site_ID), #total number of sites per region
ttl_area=sum(area), #total area size in km
DOC_perc=(sum(DOC_size, na.rm=TRUE)/sum(area))*100, #% area under DOC
mean_size=mean(area), #mean size across region sites
sd_size=sd(area), #sd size across region sites
min_size=min(area), #minimum size across region sites
max_size=max(area) #maximum size across region sites
) %>%
mutate_if(is.numeric, round, digits=2) #round the outputs
# show here
knitr::kable(reg)
DOC_region | count | ttl_area | DOC_perc | mean_size | sd_size | min_size | max_size |
---|---|---|---|---|---|---|---|
Central North Island | 40 | 68.55 | 6.19 | 1.71 | 1.14 | 0.43 | 4.92 |
Eastern South Island | 33 | 49.46 | 22.08 | 1.50 | 2.15 | 0.36 | 11.16 |
Hauraki-Waikato-Taranaki | 45 | 63.87 | 7.58 | 1.42 | 1.22 | 0.36 | 6.94 |
Lower North Island | 42 | 81.95 | 7.16 | 1.95 | 2.23 | 0.36 | 9.68 |
Northern North Island | 100 | 275.62 | 14.44 | 2.76 | 5.98 | 0.35 | 49.64 |
Northern South Island | 48 | 86.09 | 10.12 | 1.79 | 4.35 | 0.38 | 30.14 |
Southern South Island | 36 | 74.95 | 28.22 | 2.08 | 2.87 | 0.37 | 14.39 |
Western South Island | 51 | 97.70 | 35.50 | 1.92 | 1.86 | 0.43 | 8.12 |
# summaries per island
isl <- ddply(sites@data,.(main_isld),
summarize,
count=length(site_ID), #total number of sites per island
ttl_area=sum(area), #total area size in km
DOC_perc=(sum(DOC_size, na.rm=TRUE)/sum(area))*100, #% area under DOC
mean_size=mean(area), #mean size across island sites
sd_size=sd(area), #sd size across island sites
min_size=min(area), #minimum size across island sites
max_size=max(area) #maximum size across island sites
) %>%
mutate_if(is.numeric, round, digits=2) #round the outputs
# show here
knitr::kable(isl)
main_isld | count | ttl_area | DOC_perc | mean_size | sd_size | min_size | max_size |
---|---|---|---|---|---|---|---|
North Island | 227 | 489.99 | 11.17 | 2.16 | 4.17 | 0.35 | 49.64 |
South Island | 168 | 308.20 | 24.48 | 1.83 | 3.00 | 0.36 | 30.14 |
Here, we can summarise the range of uncertainty for each state prediction.
# summaries for sites near historic locations
cv.sum <- ddply(sites@data,.(),
summarize,
mean_S1=mean(S1_uncrt), #mean CV across island sites
mean_S2=mean(S2_uncrt), #mean CV across island sites
mean_S3=mean(S3_uncrt), #mean CV across island sites
sd_S1=sd(S1_uncrt), #sd CV across island sites
sd_S2=sd(S2_uncrt), #sd CV across island sites
sd_S3=sd(S3_uncrt), #sd CV across island sites
min_S1=min(S1_uncrt), #minimum CV across island sites
min_S2=min(S2_uncrt), #minimum CV across island sites
min_S3=min(S3_uncrt), #minimum CV across island sites
max_S1=max(S1_uncrt), #maximum CV across island sites
max_S2=max(S2_uncrt), #maximum CV across island sites
max_S3=max(S3_uncrt) #maximum CV across island sites
) %>%
mutate_if(is.numeric, round, digits=2) #round the outputs
# transform
cv.sum <- melt(cv.sum)
cv.sum <- cv.sum[,-1]
colnames(cv.sum)[2] <- 'CV_percent'
# show here
knitr::kable(cv.sum)
variable | CV_percent |
---|---|
mean_S1 | 1.12 |
mean_S2 | 0.46 |
mean_S3 | 0.59 |
sd_S1 | 0.38 |
sd_S2 | 0.12 |
sd_S3 | 0.28 |
min_S1 | 0.49 |
min_S2 | 0.13 |
min_S3 | 0.20 |
max_S1 | 2.20 |
max_S2 | 0.79 |
max_S3 | 1.59 |
# summaries per region
cur_hist <- ddply(sites@data,.(DOC_region),
summarize,
ttl_curr=sum(!is.na(curr_NZSL)), #number sites within curr.
ttl_hist=sum(!is.na(histr_NZSL)), #number sites within hist.
perc_curr=sum(!is.na(curr_NZSL))/
length(site_ID), #% sites within current breeding locations
perc_hist=sum(!is.na(histr_NZSL))/
length(site_ID) #% sites within historical breeding locs
) %>%
mutate_if(is.numeric, round, digits=2) #round the outputs
# show here
knitr::kable(cur_hist)
DOC_region | ttl_curr | ttl_hist | perc_curr | perc_hist |
---|---|---|---|---|
Central North Island | 0 | 0 | 0.00 | 0.00 |
Eastern South Island | 9 | 0 | 0.27 | 0.00 |
Hauraki-Waikato-Taranaki | 0 | 0 | 0.00 | 0.00 |
Lower North Island | 0 | 1 | 0.00 | 0.02 |
Northern North Island | 0 | 6 | 0.00 | 0.06 |
Northern South Island | 0 | 4 | 0.00 | 0.08 |
Southern South Island | 10 | 0 | 0.28 | 0.00 |
Western South Island | 0 | 0 | 0.00 | 0.00 |
Only select sites where the column curr_NZSL has data (not NA).
# subset the sites within current pupping location ranges
sites_curr_loc <- sites[!is.na(sites@data$curr_NZSL),]
From this, we can then make the following short queries:
# how many sites?
length(sites_curr_loc)
## [1] 19
# list names of sites
select(sites_curr_loc@data, DOC_region, site_ID)
Since the curr_NZSL column is a semi-colon-separated (;) list of multiple pupping site location names, we first separate them into duplicate rows. Then we summarise by pupping location.
# split into mulple rows
curr_long <- separate_rows(sites_curr_loc@data, curr_NZSL, sep = "; ",
convert = TRUE)
# convert to factor
curr_long$curr_NZSL <- as.factor(curr_long$curr_NZSL)
# summaries of pupping locations
pup <- ddply(curr_long,.(curr_NZSL),
summarize,
ttl_curr=length(curr_NZSL) #number sites within curr.
)
# show here
knitr::kable(pup)
curr_NZSL | ttl_curr |
---|---|
Akatore Creek | 3 |
Allans Beach | 3 |
Boulder Beach | 2 |
Cabbage Point | 1 |
Catlins Lake | 1 |
Hoopers Inlet | 3 |
Kaka Point | 4 |
North Arm | 1 |
Nugget Point | 2 |
Porpoise Bay | 2 |
Port Pegasus/Pikihatiti | 1 |
Purakaunui Bay | 1 |
Purehurehu Point | 3 |
Sandfly Bay | 4 |
Smaills Beach | 1 |
South Arm | 1 |
Surat Bay | 1 |
Tahakopa Bay | 3 |
Victory Beach | 2 |
Warrington | 4 |
This table can also be viewed as a bar chart.
# colours
colourCount = length(unique(pup$curr_NZSL))
getPalette = colorRampPalette(brewer.pal(9, "Spectral"))
# plot site frequencies per pupping location
freq_sites <- ggplot(pup) +
aes(x=curr_NZSL, y=ttl_curr, fill=factor(curr_NZSL)) +
geom_bar(stat='identity') +
scale_fill_manual(values = getPalette(colourCount)) +
ylab('count') + xlab('current pupping locations') +
ggtitle('Number of sites within 10km of known pupping locations') +
theme(axis.text.x=element_text(angle = 45, vjust = .9, hjust = .9),
legend.position = 'none')
# display here
freq_sites
Here is a quick view of the sites.
# plot sites
plot(sites_curr_loc)
A better way to plot these sites is shown in the next section.
Get extent of current NZSL pupping locations, which will be used for the map.
# get extent
extent(sites_curr_loc)
## class : Extent
## xmin : 1190875
## xmax : 1424325
## ymin : 4758650
## ymax : 4946625
Create a background polygon that is clipped to this area. We use the above extent to first create a raster and then a polygon.
# create background polygon
bkg <- as(raster::extent(c(1190875,1454325,4758650,4956625)), "SpatialPolygons")
proj4string(bkg) <- paste0("+init=epsg:2193 +proj=tmerc +lat_0=0 +lon_0=173 +k=0.9996",
" +x_0=1600000 +y_0=10000000 +ellps=GRS80 ",
"+towgs84=0,0,0,0,0,0,0 +units=m +no_defs")
bkg <- spTransform(bkg, CRS("+init=epsg:2193"))
Make a base map with the background polygon as the extent, and the NZ and inland water polygons as the overlain layers.
# set mode
tmap_mode("plot")
# base map
# background polygon
bkg_map <- tm_shape(bkg) +
tm_borders('white', alpha = 0) +
tm_layout(frame=FALSE, bg.color = NA)
# NZ polygon
NZ_map <- tm_shape(NZ_polygon) +
tm_polygons(col='#DDDDDD', border.alpha = 0) +
tm_legend(show=FALSE)
# inland water bodies
wtr_map <- tm_shape(NZ_water) +
tm_fill(col='#33BBEE',
palette = '#33BBEE', alpha = 1) +
tm_legend(show=FALSE)
# combine
base_map <- bkg_map + NZ_map + wtr_map
We can view the base map here.
# display here
base_map
Make a map layer of human impact area polygons.
# human impact areas
hum_map <- tm_shape(human_impact_areas) +
tm_fill(col='hum_imp',
palette='#EE7733',alpha = 1,
title = '', legend.show = FALSE)
We can view it here, lain over the base map.
# display here
base_map + hum_map
Make a map layer of suitable sites polygons.
# make suitable sites polygon layers
suit_map <- tm_shape(sites_curr_loc) +
tm_polygons(border.col='#000000', lwd=1, alpha = 0, border.alpha = 1,
title = 'suitable sites', legend.show = FALSE)
Make a map layer of current pupping locations.
# points for current sighting locations
nzsl_pts <- tm_shape(NZSL_current) +
tm_dots(col = 'loc_type', size = 0.08,
palette=c('#882E72'), shape = 17,
title = 'known female/pup locations', legend.show = FALSE)
Create legend of all the objects.
# legend only
leg <- bkg_map +
# suitable sites
tm_add_legend(type = c("symbol"),
labels = 'Suitable sites',
col = '#000000',
size = .5, shape = 0,
title = '') +
# NZSL sightings
tm_add_legend(type = c("symbol"),
labels = 'Current pupping sites',
col = '#882E72',#'#33BBEE',
size = .3, shape = 17,#19,
title = '') +
# human impacts
tm_add_legend(type = c("symbol"),
labels = 'Human impact areas',
col = '#EE7733',#'#EE6677',
alpha = 1,
size = .5, shape = 15,
title = '') +
# inland water bodies
tm_add_legend(type = c("symbol"),
labels = 'Inland water bodies',
col = '#33BBEE',
alpha = 1,
size = .5, shape = 15,
title = '') +
# layout
tm_layout(legend.show = TRUE,
legend.position = c('right','bottom'),
frame=FALSE,
bg.color = NA)
Combine all map layers and legend into one map.
site_map <- # maps
base_map + hum_map + nzsl_pts + suit_map +
# legend
leg +
# scalebar
tm_scale_bar(breaks = c(0,10,20), size = 0.5,
position = c(.4,0))
Save map.
# save image ("plot" mode)
tmap_save(site_map, filename = paste0(fig.dir,"current_sites_zoom.png"),
height = 4, width=5, units = 'in', dpi=600)
Display here.
knitr::include_graphics(paste0(fig.dir,"current_sites_zoom.png"))
Make a new dataframe of certain columns within the database and extract as a CSV table (these were used for Table 2 in the corresponding manuscript).
# dataframe select by column
curr_df <- select(sites_curr_loc@data,
c(site_ID, # site name information
area, # site size information
DOC_pc, # existing management information
hum_im_pc, graze_pc, fences, # human impacts
S1_limits, S2_limits, S3_limits, # restoration features
MESS_class, dissim_var, # model uncertainty
S1_uncrt, S2_uncrt, S3_uncrt
))
# change NAs to 0
curr_df$DOC_pc[is.na(curr_df$DOC_pc)] <- 0
curr_df$graze_pc[is.na(curr_df$graze_pc)] <- 0
Change some of the data inside to compress the table.
# Change names
curr_df$fences <- mapvalues(curr_df$fences,
from=c('absent','present'),
to=c('N','Y'))
# Remove words
curr_df <- data.frame(lapply(curr_df,
function(x) {gsub(" distance","",x,ignore.case = TRUE)}))
curr_df <- data.frame(lapply(curr_df,
function(x) {gsub("inland ","",x,ignore.case = TRUE)}))
curr_df <- data.frame(lapply(curr_df,
function(x) {gsub("land cover","LC",x,ignore.case = TRUE)}))
# inspect final table
knitr::kable(curr_df)
site_ID | area | DOC_pc | hum_im_pc | graze_pc | fences | S1_limits | S2_limits | S3_limits | MESS_class | dissim_var | S1_uncrt | S2_uncrt | S3_uncrt |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|
ESI-21 | 1.76 | 3.43 | 33.82 | 10.69 | N | sand | grass | coast | low | slope | 1.27 | 0.36 | 0.26 |
ESI-22 | 0.71 | 0.00 | 0.00 | 35.16 | N | sand | sand | coast | low | LC | 1.28 | 0.28 | 0.38 |
ESI-23 | 0.45 | 0.00 | 0.00 | 0.07 | N | grass | grass | grass | none | slope | 0.75 | 0.57 | 0.74 |
ESI-24 | 0.79 | 0.00 | 100.00 | 0.55 | Y | grass | water | water | low | water | 0.63 | 0.58 | 0.82 |
ESI-25 | 2.11 | 0.00 | 0.00 | 1.78 | Y | sand | sand | grass | low | slope | 1.40 | 0.43 | 0.36 |
ESI-26 | 0.37 | 5.83 | 0.00 | 16.12 | N | grass | grass | grass | low | sand | 0.96 | 0.40 | 0.59 |
ESI-31 | 0.48 | 0.84 | 0.00 | 9.99 | Y | sand | grass | grass | low | sand | 1.35 | 0.32 | 0.52 |
ESI-32 | 0.57 | 20.95 | 0.00 | 12.40 | Y | sand | sand | sand | low | sand | 1.42 | 0.38 | 0.22 |
ESI-33 | 3.79 | 3.49 | 0.00 | 41.40 | Y | sand | grass | grass | low | LC | 1.31 | 0.38 | 0.51 |
SSI-5 | 0.40 | 0.00 | 0.00 | 7.31 | N | grass | grass | grass | none | sand | 0.68 | 0.52 | 0.78 |
SSI-6 | 1.48 | 4.52 | 0.00 | 36.75 | Y | sand | grass | grass | low | LC | 1.32 | 0.43 | 0.39 |
SSI-8 | 2.26 | 61.29 | 0.00 | 15.23 | Y | sand | sand | water | low | sand | 1.35 | 0.23 | 0.34 |
SSI-13 | 1.13 | 12.20 | 0.00 | 13.40 | N | grass | water | water | low | water | 1.08 | 0.44 | 0.57 |
SSI-15 | 1.49 | 54.33 | 0.00 | 4.81 | Y | sand | sand | water | low | slope | 1.61 | 0.36 | 0.30 |
SSI-17 | 0.86 | 23.88 | 0.00 | 16.55 | N | grass | grass | grass | none | sand | 1.19 | 0.42 | 0.52 |
SSI-20 | 0.84 | 59.57 | 0.00 | 7.32 | N | sand | sand | grass | none | sand | 1.85 | 0.48 | 0.34 |
SSI-24 | 1.24 | 5.47 | 83.86 | 41.56 | Y | grass | grass | grass | low | LC | 1.12 | 0.43 | 0.55 |
SSI-25 | 0.46 | 61.74 | 0.00 | 0.53 | N | grass | grass | grass | none | sand | 0.61 | 0.43 | 0.78 |
SSI-36 | 0.51 | 16.17 | 0.00 | 0.00 | N | grass | water | water | intermediate | water | 0.58 | 0.64 | 0.98 |
Save as CSV.
# save as csv
write.csv(curr_df,paste0(tab.dir,'evaluation_sites_FORMATTED.csv'), row.names = FALSE)
# save workspace to load later if needed
save.image("G:/R/sites_eval.RData")