trout_artsobs <- readRDS("../R/output/trout_artsobs.RDS")
trout_survey <- readRDS("../R/output/trout_survey.RDS")
trout_artsobs_df <- readRDS("../R/output/trout_artsobs_df.RDS")
trout_survey_df <- readRDS("../R/output/trout_survey_df.RDS")

1 Looking at the observations

# Empty map theme
empty_theme_map <- theme(                              
  plot.background = element_blank(), 
  panel.grid.major = element_blank(), 
  panel.grid.minor = element_blank(), 
  panel.border = element_blank(), 
  panel.background = element_blank(),
  axis.line = element_blank(),
  axis.ticks = element_blank(),
  axis.title.x = element_blank(), 
  axis.title.y = element_blank(),
  axis.text = element_blank()
)
empty_theme <- theme(                              
  plot.background = element_blank(), 
  panel.grid.major = element_blank(), 
  panel.grid.minor = element_blank(), 
  panel.border = element_blank(), 
  panel.background = element_blank(),
  axis.text.x = element_blank(),
  axis.title.x = element_blank()
)

# Setting up color palette
trutta_colors <- fish(n = 20, option = "Salmo_trutta")
#fishualize(option = "Salmo_trutta")

1.1 Observation map

norway <- ggplot2::map_data("world", region = "Norway(?!:Svalbard)")
norway <- dplyr::setdiff(norway, filter(norway, subregion == "Jan Mayen"))
p1 <- ggplot(data.frame(trout_artsobs)) +
  geom_polygon(data = norway, aes(long, lat, group = group), 
                 color=trutta_colors[11], fill = trutta_colors[11]) +
  geom_point(aes(x = decimalLongitude, y = decimalLatitude), 
               color = trutta_colors[4], alpha = 0.8, size = 1) +
  empty_theme_map +
  ggtitle("Artsobservasjoner")

p2 <- ggplot(data.frame(trout_survey)) +
  geom_polygon(data = norway, aes(long, lat, group = group), 
                 color=trutta_colors[11], fill = trutta_colors[11]) +
  geom_point(aes(x = decimalLongitude, y = decimalLatitude, color = occurrenceStatus), alpha = 0.8, size = 1) +
  scale_color_manual(values=c(trutta_colors[15],trutta_colors[4])) +
  theme(legend.position = "none") +
  empty_theme_map +
  ggtitle("Fish status survey of nordic lakes")
  
ggarrange(p1, p2)

Above, I have plotted all the observations for both data sets. Note that the “Fish status survey of nordic lakes” data set is presence/absence, presences are in red in both plots, while absences are in grey when reported.

To get a better impression of observation density across the map, we look at a hexagon map.

p1 <- ggplot(data.frame(trout_artsobs), aes(x = decimalLongitude, y = decimalLatitude)) +
  #geom_polygon(data = norway, aes(long, lat, group = group)) +
  geom_hex() +
  empty_theme_map +
  scale_fill_fish(option = "Salmo_trutta", begin = 0.52, end = 0.9, direction = 1)+
  ggtitle("Artsobservasjoner")

p2 <- ggplot(data.frame(trout_survey), aes(x = decimalLongitude, y = decimalLatitude)) +
  #geom_polygon(data = norway, aes(long, lat, group = group)) +
  geom_hex() +
  empty_theme_map +
  scale_fill_fish(option = "Salmo_trutta", begin = 0.52, end = 0.9, direction = 1)+
  ggtitle("Fish status survey of nordic lakes")

ggarrange(p1, p2)

1.2 Number of observations per year

time_counts <- count(trout_artsobs_df, year)

ggplot(time_counts, aes(x = year, y = n)) + 
  geom_line(size = 1, color = trutta_colors[7]) +
  theme_minimal() +
  theme(axis.title = element_blank()) +
  geom_vline(xintercept = 1996, linetype = "dotted", color = trutta_colors[13], size = 1)

Number of observations per year for the Artsobservasjoner data. The dotted vertical line is in 1996, which is the year all the survey observations are made.

1.3 Number of observations per square kilometer by county

fylke_areal <- c(4918, 9155, 14912, 48631, 27398, 15438, 14469, 38472, 25192, 454, 4187, 9377, 18623, 15298, 24879, 41898, 7279, 2225)

trout_artsobs_df$county.y <- trout_artsobs_df$county.y %>% recode("Sør-Trøndelag" = "Trøndelag", "Nord-Trøndelag" = "Trøndelag")
dense_fylke <- cbind(count(trout_artsobs_df, vars = county.y), fylke_areal) %>% mutate(obs_per_area = n/fylke_areal)

ggplot(dense_fylke, aes(x = vars, y = obs_per_area)) + 
  geom_bar(stat = "identity", color=trutta_colors[2], fill=trutta_colors[2], width = 0.7) + 
  #geom_text(aes(label = obs_in_county), hjust = -0.5, color="black", size=4) +
  coord_flip() +
 # ylab("Observations per km^2") +
  theme_minimal() + 
  theme(text = element_text(size = 15),
        axis.title.y = element_blank())+
  empty_theme

Number of observations per square km, by county, in the Artsobservasjoner data.

2 Explanatory variables

The possible explanatory variables are

What is intersting to look at here? Distribution of variables in space, but also distribution of the variables themselves.

Covariates <- readRDS("../R/output/Covariates.RDS")
Cov_long <- gather(data.frame(Covariates), key = variable, value = value, area_km2:log_catchment)

2.1 Explanatory variables on a map

plot_exp_var <- function(var){
  ggplot(Cov_long %>% dplyr::select(variable, value, decimalLatitude, decimalLongitude) %>% 
                filter(variable==var)) +
    geom_polygon(data = norway, aes(long, lat, group = group), 
                 color = "grey", fill = "grey") +
    geom_point(aes(x = decimalLongitude, y = decimalLatitude, color = value),
             alpha = 0.8, size = 0.3) +
    scale_color_fish(option = "Salmo_trutta") +
    #scale_color_gradient(low = trutta_colors[], high = "darkblue")
    empty_theme_map +
    ggtitle(var)
}
Cov_names <- unique(Cov_long$variable)
var_plots <- lapply(Cov_names, plot_exp_var)

ggarrange(plotlist = var_plots, nrow = 4, ncol = 3)

2.2 Explanatory variables summary

descr(data.frame(Covariates)[,4:13], stats = "fivenum", transpose = TRUE)

Descriptive Statistics

Min Q1 Median Q3 Max
area_km2 0.00 0.01 0.02 0.06 369.47
catchment_area_km2 0.00 0.23 0.76 2.86 40501.43
distance_to_road 0.00 782.00 2402.00 5337.00 37263.00
eurolst_bio10 47.33 102.37 109.12 123.02 168.69
HFP 0.00 1.25 2.36 5.00 47.28
log_area -12.00 -4.39 -3.77 -2.82 5.91
log_catchment -7.13 -1.46 -0.28 1.05 10.61
log_perimeter 2.40 6.30 6.71 7.29 13.75
perimeter_m 11.00 547.00 823.00 1459.00 937991.00
SCI 1.01 1.19 1.32 1.54 4.61

2.3 Explanatory variables histograms

To display sort of the same info as the table.

ggplot(Cov_long, aes(x = value)) +
    geom_histogram(fill = trutta_colors[2], color = trutta_colors[2]) +
      facet_wrap(~variable, scales = 'free_x', nrow = 2) +
  theme_minimal() + 
  theme(axis.title = element_blank())

3 Exploring model output

Spatial fields, predictions, comparing preformance of the four models

Projection <- CRS("+proj=longlat +ellps=WGS84")
norwayfill <- map("world", "norway", fill=TRUE, plot=FALSE, 
                  ylim=c(58,72), xlim=c(4,32))
IDs <- sapply(strsplit(norwayfill$names, ":"), function(x) x[1])
norway.poly <- map2SpatialPolygons(norwayfill, IDs = IDs, 
                                   proj4string = Projection)
Meshpars <- list(cutoff=0.08, max.edge=c(1, 3), offset=c(1,1))
Mesh <- MakeSpatialRegion(data=NULL, bdry=norway.poly, meshpars=Meshpars,
                          proj = Projection)
source("../R/Model_visualization_functions.R")

3.1 Comparing different models

res <- readRDS("../R/output/cv_output_4mods.RDS")

dic_values <- matrix(unlist(res), ncol = 5)
av_dic <- rowMeans(dic_values)

(please ignore the actual numbers, I do not trust them)

1 spatial field 2 spatial fields
Env. covariates 6800.9047929
860.2044
Env. + effort covariates 479.2709594 824.1562596

Above: Result of block cross-validation, comparing four different models. The numbers are the marginal deviance for each model.

3.2 Looking at final model

model_final <- readRDS("../R/output/model_final_1.RDS")

summary(model_final$model)
## 
## Call:
##    c("INLA::inla(formula = Formula, family = c(\"poisson\", \"binomial\"), 
##    ", " data = INLA::inla.stack.data(stck), E = 
##    INLA::inla.stack.data(stck)$e, ", " offset = offset.formula, Ntrials = 
##    INLA::inla.stack.data(stck)$Ntrials, ", " verbose = verbose, 
##    control.compute = list(waic = waic, dic = dic), ", " control.predictor 
##    = list(A = INLA::inla.stack.A(stck), link = NULL, ", " compute = TRUE), 
##    control.family = list(list(link = \"log\"), ", " list(link = 
##    \"cloglog\")), control.results = list(return.marginals.random = FALSE, 
##    ", " return.marginals.predictor = FALSE), control.fixed = 
##    control.fixed)" ) 
## Time used:
##     Pre = 13.2, Running = 1263, Post = 2.05, Total = 1278 
## Fixed effects:
##                    mean     sd 0.025quant 0.5quant 0.975quant   mode kld
## int.survey        6.240 10.196    -20.415    6.632     27.383  5.939   0
## int.artsobs       5.448 11.794    -23.716    6.028     29.526  5.395   0
## decimalLongitude -0.194  0.114     -0.447   -0.183     -0.002 -0.156   0
## decimalLatitude  -0.051  0.030     -0.119   -0.047     -0.001 -0.041   0
## log_area          0.015  0.021     -0.025    0.015      0.055  0.015   0
## perimeter_m       0.000  0.000      0.000    0.000      0.000  0.000   0
## eurolst_bio10    -0.002  0.004     -0.009   -0.002      0.006 -0.002   0
## SCI               0.029  0.114     -0.197    0.029      0.252  0.031   0
## distance_to_road  0.000  0.000      0.000    0.000      0.000  0.000   0
## HFP               0.000  0.005     -0.010    0.000      0.010  0.000   0
## 
## Random effects:
##   Name     Model
##     shared_field SPDE2 model
##    bias_field SPDE2 model
## 
## Model hyperparameters:
##                          mean    sd 0.025quant 0.5quant 0.975quant  mode
## Theta1 for shared_field  1.16 0.287      0.608     1.16       1.74  1.14
## Theta2 for shared_field -2.87 0.759     -4.442    -2.83      -1.45 -2.72
## Theta1 for bias_field    1.39 0.321      0.830     1.37       2.08  1.27
## Theta2 for bias_field   -2.50 0.713     -4.108    -2.41      -1.37 -2.04
## 
## Expected number of effective parameters(stdev): 28.37(3.57)
## Number of equivalent replicates : 130.97 
## 
## Marginal log-Likelihood:  -961.96 
## Posterior marginals for the linear predictor and
##  the fitted values are computed

We can plot the predictions (not shown) as well as the spatial field means and standard deviations:

spat_fields_df <- proj_random_field(model_final$model, sp_polygon = norway.poly, mesh = Mesh$mesh)

ggplot(spat_fields_df) +
  geom_polygon(data = norway, aes(long, lat, group = group), 
               color="white", fill = "white") +
    geom_raster(aes(x = decimalLongitude, y = decimalLatitude, fill = mean)) +
  scale_fill_fish(option = "Salmo_trutta") +
  empty_theme_map +
  facet_wrap(~ field)

ggplot(spat_fields_df) +
  geom_polygon(data = norway, aes(long, lat, group = group), 
               color="white", fill = "white") +
    geom_raster(aes(x = decimalLongitude, y = decimalLatitude, fill = sd)) +
  scale_fill_fish(option = "Salmo_trutta") +
  empty_theme_map +
  facet_wrap(~ field)