trout_artsobs <- readRDS("../R/output/trout_artsobs.RDS")
trout_survey <- readRDS("../R/output/trout_survey.RDS")
trout_artsobs_df <- readRDS("../R/output/trout_artsobs_df.RDS")
trout_survey_df <- readRDS("../R/output/trout_survey_df.RDS")
# Empty map theme
empty_theme_map <- theme(
plot.background = element_blank(),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
panel.border = element_blank(),
panel.background = element_blank(),
axis.line = element_blank(),
axis.ticks = element_blank(),
axis.title.x = element_blank(),
axis.title.y = element_blank(),
axis.text = element_blank()
)
empty_theme <- theme(
plot.background = element_blank(),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
panel.border = element_blank(),
panel.background = element_blank(),
axis.text.x = element_blank(),
axis.title.x = element_blank()
)
# Setting up color palette
trutta_colors <- fish(n = 20, option = "Salmo_trutta")
#fishualize(option = "Salmo_trutta")
norway <- ggplot2::map_data("world", region = "Norway(?!:Svalbard)")
norway <- dplyr::setdiff(norway, filter(norway, subregion == "Jan Mayen"))
p1 <- ggplot(data.frame(trout_artsobs)) +
geom_polygon(data = norway, aes(long, lat, group = group),
color=trutta_colors[11], fill = trutta_colors[11]) +
geom_point(aes(x = decimalLongitude, y = decimalLatitude),
color = trutta_colors[4], alpha = 0.8, size = 1) +
empty_theme_map +
ggtitle("Artsobservasjoner")
p2 <- ggplot(data.frame(trout_survey)) +
geom_polygon(data = norway, aes(long, lat, group = group),
color=trutta_colors[11], fill = trutta_colors[11]) +
geom_point(aes(x = decimalLongitude, y = decimalLatitude, color = occurrenceStatus), alpha = 0.8, size = 1) +
scale_color_manual(values=c(trutta_colors[15],trutta_colors[4])) +
theme(legend.position = "none") +
empty_theme_map +
ggtitle("Fish status survey of nordic lakes")
ggarrange(p1, p2)
Above, I have plotted all the observations for both data sets. Note that the “Fish status survey of nordic lakes” data set is presence/absence, presences are in red in both plots, while absences are in grey when reported.
To get a better impression of observation density across the map, we look at a hexagon map.
p1 <- ggplot(data.frame(trout_artsobs), aes(x = decimalLongitude, y = decimalLatitude)) +
#geom_polygon(data = norway, aes(long, lat, group = group)) +
geom_hex() +
empty_theme_map +
scale_fill_fish(option = "Salmo_trutta", begin = 0.52, end = 0.9, direction = 1)+
ggtitle("Artsobservasjoner")
p2 <- ggplot(data.frame(trout_survey), aes(x = decimalLongitude, y = decimalLatitude)) +
#geom_polygon(data = norway, aes(long, lat, group = group)) +
geom_hex() +
empty_theme_map +
scale_fill_fish(option = "Salmo_trutta", begin = 0.52, end = 0.9, direction = 1)+
ggtitle("Fish status survey of nordic lakes")
ggarrange(p1, p2)
time_counts <- count(trout_artsobs_df, year)
ggplot(time_counts, aes(x = year, y = n)) +
geom_line(size = 1, color = trutta_colors[7]) +
theme_minimal() +
theme(axis.title = element_blank()) +
geom_vline(xintercept = 1996, linetype = "dotted", color = trutta_colors[13], size = 1)
Number of observations per year for the Artsobservasjoner data. The dotted vertical line is in 1996, which is the year all the survey observations are made.
fylke_areal <- c(4918, 9155, 14912, 48631, 27398, 15438, 14469, 38472, 25192, 454, 4187, 9377, 18623, 15298, 24879, 41898, 7279, 2225)
trout_artsobs_df$county.y <- trout_artsobs_df$county.y %>% recode("Sør-Trøndelag" = "Trøndelag", "Nord-Trøndelag" = "Trøndelag")
dense_fylke <- cbind(count(trout_artsobs_df, vars = county.y), fylke_areal) %>% mutate(obs_per_area = n/fylke_areal)
ggplot(dense_fylke, aes(x = vars, y = obs_per_area)) +
geom_bar(stat = "identity", color=trutta_colors[2], fill=trutta_colors[2], width = 0.7) +
#geom_text(aes(label = obs_in_county), hjust = -0.5, color="black", size=4) +
coord_flip() +
# ylab("Observations per km^2") +
theme_minimal() +
theme(text = element_text(size = 15),
axis.title.y = element_blank())+
empty_theme
Number of observations per square km, by county, in the Artsobservasjoner data.
The possible explanatory variables are
area_km2
: the area of the lake, in square kilometersperimeter_m
: the perimeter of the lake, in metersdistance_to_road
: the distance to the closest roadeurolst_bio10
: average temperature of the warmest annual quarter, in degrees celcius multiplied by 10catchment_area_km2
: the catchment area of the lake, in square kilometersSCI
: shoreline complexity indexHFP
: human footprint indexWhat is intersting to look at here? Distribution of variables in space, but also distribution of the variables themselves.
Covariates <- readRDS("../R/output/Covariates.RDS")
Cov_long <- gather(data.frame(Covariates), key = variable, value = value, area_km2:log_catchment)
plot_exp_var <- function(var){
ggplot(Cov_long %>% dplyr::select(variable, value, decimalLatitude, decimalLongitude) %>%
filter(variable==var)) +
geom_polygon(data = norway, aes(long, lat, group = group),
color = "grey", fill = "grey") +
geom_point(aes(x = decimalLongitude, y = decimalLatitude, color = value),
alpha = 0.8, size = 0.3) +
scale_color_fish(option = "Salmo_trutta") +
#scale_color_gradient(low = trutta_colors[], high = "darkblue")
empty_theme_map +
ggtitle(var)
}
Cov_names <- unique(Cov_long$variable)
var_plots <- lapply(Cov_names, plot_exp_var)
ggarrange(plotlist = var_plots, nrow = 4, ncol = 3)
descr(data.frame(Covariates)[,4:13], stats = "fivenum", transpose = TRUE)
Descriptive Statistics
Min | Q1 | Median | Q3 | Max | |
---|---|---|---|---|---|
area_km2 | 0.00 | 0.01 | 0.02 | 0.06 | 369.47 |
catchment_area_km2 | 0.00 | 0.23 | 0.76 | 2.86 | 40501.43 |
distance_to_road | 0.00 | 782.00 | 2402.00 | 5337.00 | 37263.00 |
eurolst_bio10 | 47.33 | 102.37 | 109.12 | 123.02 | 168.69 |
HFP | 0.00 | 1.25 | 2.36 | 5.00 | 47.28 |
log_area | -12.00 | -4.39 | -3.77 | -2.82 | 5.91 |
log_catchment | -7.13 | -1.46 | -0.28 | 1.05 | 10.61 |
log_perimeter | 2.40 | 6.30 | 6.71 | 7.29 | 13.75 |
perimeter_m | 11.00 | 547.00 | 823.00 | 1459.00 | 937991.00 |
SCI | 1.01 | 1.19 | 1.32 | 1.54 | 4.61 |
To display sort of the same info as the table.
ggplot(Cov_long, aes(x = value)) +
geom_histogram(fill = trutta_colors[2], color = trutta_colors[2]) +
facet_wrap(~variable, scales = 'free_x', nrow = 2) +
theme_minimal() +
theme(axis.title = element_blank())
Spatial fields, predictions, comparing preformance of the four models
Projection <- CRS("+proj=longlat +ellps=WGS84")
norwayfill <- map("world", "norway", fill=TRUE, plot=FALSE,
ylim=c(58,72), xlim=c(4,32))
IDs <- sapply(strsplit(norwayfill$names, ":"), function(x) x[1])
norway.poly <- map2SpatialPolygons(norwayfill, IDs = IDs,
proj4string = Projection)
Meshpars <- list(cutoff=0.08, max.edge=c(1, 3), offset=c(1,1))
Mesh <- MakeSpatialRegion(data=NULL, bdry=norway.poly, meshpars=Meshpars,
proj = Projection)
source("../R/Model_visualization_functions.R")
res <- readRDS("../R/output/cv_output_4mods.RDS")
dic_values <- matrix(unlist(res), ncol = 5)
av_dic <- rowMeans(dic_values)
(please ignore the actual numbers, I do not trust them)
1 spatial field | 2 spatial fields | |
---|---|---|
Env. covariates | 6800.9047929 |
|
Env. + effort covariates | 479.2709594 | 824.1562596 |
Above: Result of block cross-validation, comparing four different models. The numbers are the marginal deviance for each model.
model_final <- readRDS("../R/output/model_final_1.RDS")
summary(model_final$model)
##
## Call:
## c("INLA::inla(formula = Formula, family = c(\"poisson\", \"binomial\"),
## ", " data = INLA::inla.stack.data(stck), E =
## INLA::inla.stack.data(stck)$e, ", " offset = offset.formula, Ntrials =
## INLA::inla.stack.data(stck)$Ntrials, ", " verbose = verbose,
## control.compute = list(waic = waic, dic = dic), ", " control.predictor
## = list(A = INLA::inla.stack.A(stck), link = NULL, ", " compute = TRUE),
## control.family = list(list(link = \"log\"), ", " list(link =
## \"cloglog\")), control.results = list(return.marginals.random = FALSE,
## ", " return.marginals.predictor = FALSE), control.fixed =
## control.fixed)" )
## Time used:
## Pre = 13.2, Running = 1263, Post = 2.05, Total = 1278
## Fixed effects:
## mean sd 0.025quant 0.5quant 0.975quant mode kld
## int.survey 6.240 10.196 -20.415 6.632 27.383 5.939 0
## int.artsobs 5.448 11.794 -23.716 6.028 29.526 5.395 0
## decimalLongitude -0.194 0.114 -0.447 -0.183 -0.002 -0.156 0
## decimalLatitude -0.051 0.030 -0.119 -0.047 -0.001 -0.041 0
## log_area 0.015 0.021 -0.025 0.015 0.055 0.015 0
## perimeter_m 0.000 0.000 0.000 0.000 0.000 0.000 0
## eurolst_bio10 -0.002 0.004 -0.009 -0.002 0.006 -0.002 0
## SCI 0.029 0.114 -0.197 0.029 0.252 0.031 0
## distance_to_road 0.000 0.000 0.000 0.000 0.000 0.000 0
## HFP 0.000 0.005 -0.010 0.000 0.010 0.000 0
##
## Random effects:
## Name Model
## shared_field SPDE2 model
## bias_field SPDE2 model
##
## Model hyperparameters:
## mean sd 0.025quant 0.5quant 0.975quant mode
## Theta1 for shared_field 1.16 0.287 0.608 1.16 1.74 1.14
## Theta2 for shared_field -2.87 0.759 -4.442 -2.83 -1.45 -2.72
## Theta1 for bias_field 1.39 0.321 0.830 1.37 2.08 1.27
## Theta2 for bias_field -2.50 0.713 -4.108 -2.41 -1.37 -2.04
##
## Expected number of effective parameters(stdev): 28.37(3.57)
## Number of equivalent replicates : 130.97
##
## Marginal log-Likelihood: -961.96
## Posterior marginals for the linear predictor and
## the fitted values are computed
We can plot the predictions (not shown) as well as the spatial field means and standard deviations:
spat_fields_df <- proj_random_field(model_final$model, sp_polygon = norway.poly, mesh = Mesh$mesh)
ggplot(spat_fields_df) +
geom_polygon(data = norway, aes(long, lat, group = group),
color="white", fill = "white") +
geom_raster(aes(x = decimalLongitude, y = decimalLatitude, fill = mean)) +
scale_fill_fish(option = "Salmo_trutta") +
empty_theme_map +
facet_wrap(~ field)
ggplot(spat_fields_df) +
geom_polygon(data = norway, aes(long, lat, group = group),
color="white", fill = "white") +
geom_raster(aes(x = decimalLongitude, y = decimalLatitude, fill = sd)) +
scale_fill_fish(option = "Salmo_trutta") +
empty_theme_map +
facet_wrap(~ field)