Fit size-spectrum models

For areas separately, using Edwards et al’s MLEbin method

# Load libraries (install first if needed)
library(tidyverse); theme_set(theme_classic(base_size = 12))
#> Warning: package 'tidyr' was built under R version 4.0.5
library(tidylog)
library(RColorBrewer)
#> Warning: package 'RColorBrewer' was built under R version 4.0.5
library(patchwork)
library(sizeSpectra)
library(brms)
#> Warning: package 'Rcpp' was built under R version 4.0.5
library(nlstools)
library(bayesplot)
library(tidylog)
library(tidybayes)
library(RColorBrewer)
library(modelr)
library(viridis)
library(ggridges)
library(grDevices)

# Print package versions for versions
sessionInfo() 
#> R version 4.0.2 (2020-06-22)
#> Platform: x86_64-apple-darwin17.0 (64-bit)
#> Running under: macOS  10.16
#> 
#> Matrix products: default
#> BLAS:   /Library/Frameworks/R.framework/Versions/4.0/Resources/lib/libRblas.dylib
#> LAPACK: /Library/Frameworks/R.framework/Versions/4.0/Resources/lib/libRlapack.dylib
#> 
#> locale:
#> [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
#> 
#> attached base packages:
#> [1] stats     graphics  grDevices utils     datasets  methods   base     
#> 
#> other attached packages:
#>  [1] ggridges_0.5.2      viridis_0.5.1       viridisLite_0.4.1  
#>  [4] modelr_0.1.8        tidybayes_3.0.1     bayesplot_1.7.2    
#>  [7] nlstools_1.0-2      brms_2.17.0         Rcpp_1.0.8         
#> [10] sizeSpectra_1.0.0.0 patchwork_1.1.1     RColorBrewer_1.1-3 
#> [13] tidylog_1.0.2       forcats_0.5.1       stringr_1.4.1      
#> [16] dplyr_1.0.10        purrr_0.3.4         readr_2.1.1        
#> [19] tidyr_1.2.0         tibble_3.1.8        ggplot2_3.3.6      
#> [22] tidyverse_1.3.2    
#> 
#> loaded via a namespace (and not attached):
#>   [1] readxl_1.3.1         backports_1.3.0      plyr_1.8.6          
#>   [4] igraph_1.2.5         svUnit_1.0.6         splines_4.0.2       
#>   [7] crosstalk_1.1.0.1    TH.data_1.1-1        rstantools_2.1.1    
#>  [10] inline_0.3.15        digest_0.6.30        htmltools_0.5.3     
#>  [13] rsconnect_0.8.16     fansi_1.0.3          magrittr_2.0.3      
#>  [16] checkmate_2.0.0      googlesheets4_1.0.0  tzdb_0.2.0          
#>  [19] RcppParallel_5.1.4   matrixStats_0.61.0   xts_0.12-0          
#>  [22] sandwich_3.0-2       prettyunits_1.1.1    colorspace_2.0-3    
#>  [25] rvest_1.0.3          ggdist_3.0.0         haven_2.5.1         
#>  [28] xfun_0.33            callr_3.7.2          crayon_1.4.2        
#>  [31] jsonlite_1.8.0       lme4_1.1-26          survival_3.1-12     
#>  [34] zoo_1.8-8            glue_1.6.2           gtable_0.3.1        
#>  [37] gargle_1.2.0         emmeans_1.5.0        V8_3.2.0            
#>  [40] distributional_0.2.2 pkgbuild_1.3.1       rstan_2.21.2        
#>  [43] abind_1.4-5          scales_1.2.1         mvtnorm_1.1-3       
#>  [46] DBI_1.1.1            miniUI_0.1.1.1       xtable_1.8-4        
#>  [49] clisymbols_1.2.0     stats4_4.0.2         StanHeaders_2.21.0-7
#>  [52] DT_0.15              htmlwidgets_1.5.4    httr_1.4.4          
#>  [55] threejs_0.3.3        arrayhelpers_1.1-0   posterior_1.1.0     
#>  [58] ellipsis_0.3.2       pkgconfig_2.0.3      loo_2.3.1           
#>  [61] farver_2.1.1         sass_0.4.2           dbplyr_2.1.1        
#>  [64] utf8_1.2.2           tidyselect_1.1.2     rlang_1.0.6         
#>  [67] reshape2_1.4.4       later_1.3.0          munsell_0.5.0       
#>  [70] cellranger_1.1.0     tools_4.0.2          cachem_1.0.6        
#>  [73] cli_3.4.1            generics_0.1.2       broom_1.0.1         
#>  [76] evaluate_0.16        fastmap_1.1.0        yaml_2.3.5          
#>  [79] processx_3.7.0       knitr_1.40           fs_1.5.2            
#>  [82] nlme_3.1-148         projpred_2.0.2       mime_0.12           
#>  [85] xml2_1.3.3           compiler_4.0.2       shinythemes_1.1.2   
#>  [88] rstudioapi_0.14      gamm4_0.2-6          curl_4.3.2          
#>  [91] reprex_2.0.1         statmod_1.4.36       bslib_0.4.0         
#>  [94] stringi_1.7.8        ps_1.7.1             Brobdingnag_1.2-6   
#>  [97] lattice_0.20-41      Matrix_1.3-4         nloptr_1.2.2.2      
#> [100] markdown_1.1         shinyjs_1.1          tensorA_0.36.2      
#> [103] vctrs_0.5.0          pillar_1.8.1         lifecycle_1.0.3     
#> [106] jquerylib_0.1.4      bridgesampling_1.0-0 estimability_1.3    
#> [109] httpuv_1.5.5         R6_2.5.1             promises_1.1.1      
#> [112] gridExtra_2.3        codetools_0.2-16     boot_1.3-25         
#> [115] MASS_7.3-51.6        colourpicker_1.0     gtools_3.8.2        
#> [118] assertthat_0.2.1     withr_2.5.0          shinystan_2.5.0     
#> [121] multcomp_1.4-20      mgcv_1.8-31          parallel_4.0.2      
#> [124] hms_1.1.1            grid_4.0.2           minqa_1.2.4         
#> [127] coda_0.19-4          rmarkdown_2.16       googledrive_2.0.0   
#> [130] shiny_1.6.0          lubridate_1.8.0      base64enc_0.1-3     
#> [133] dygraphs_1.1.1.6

# For parallel processing
options(mc.cores = parallel::detectCores()) 

# Load cache
# qwraps2::lazyload_cache_dir(path = "R/analysis/04_size_spectra_fit_cache/html")

Read data

d <- read.csv("data/for_fitting/size_spectrum_dat.csv")

# This is for the size_df below (1 row 1 ind, for calculating mean size later)
df <- read.csv("data/cleaned/catch_BT_FM_1987-2003.csv") %>% mutate(area = Area)
#> mutate: new variable 'area' (character) with 2 unique values and 0% NA

# How many nets in total? (For scaling with effort later)
df %>%
  group_by(netID) %>%
  mutate(n = n()) %>%
  ggplot(aes(n)) +
  geom_histogram()
#> group_by: one grouping variable (netID)
#> mutate (grouped): new variable 'n' (integer) with 185 unique values and 0% NA
#> `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.


df <- df %>% group_by(Area, year) %>% mutate(n_nets_year = length(unique(netID2))) %>% ungroup()
#> group_by: 2 grouping variables (Area, year)
#> mutate (grouped): new variable 'n_nets_year' (integer) with 10 unique values and 0% NA
#> ungroup: no grouping variables

# Test
df %>% filter(year == 1995 & Area == "BT") %>% distinct(netID2)
#> filter: removed 53,506 rows (99%), 524 rows remaining
#> distinct: removed 495 rows (94%), 29 rows remaining

# Now we need to find representative catch sizes
# Plot n per length group (cm)
p1 <- ggplot(df, aes(factor(length_group))) +   
  geom_histogram(stat = "count") +
  facet_wrap(~Area, scales = "free") + # Note the differences in # data
  theme_classic() + 
  coord_cartesian(expand = 0)
#> Warning: Ignoring unknown parameters: binwidth, bins, pad

# Filter out fish > 13 cm
df <- df %>% filter(length_group > 13) %>% as.data.frame()
#> filter: removed 21,128 rows (39%), 32,902 rows remaining

# Plot again
p2 <- ggplot(df, aes(factor(length_group))) +   
  geom_histogram(stat = "count") +
  facet_wrap(~Area, scales = "free") + # Note the differences in # data
  theme_classic() + 
  coord_cartesian(expand = 0)
#> Warning: Ignoring unknown parameters: binwidth, bins, pad

p1/p2


# Before we process data any further, save it because we will use it in this format
# for analysing mean size (i.e. we need 1 row = 1 ind)

size_df <- df
colnames(size_df)
#>  [1] "Area"         "Sektion"      "Station"      "year"         "week"        
#>  [6] "day"          "Längdgr_std"  "length_group" "netID"        "netID2"      
#> [11] "area"         "n_nets_year"

Fit models

dataBintest <- d %>% rename("Number" = "cpue_numbers")
#> rename: renamed one variable (Number)

# Following and modifying this vignette:
# https://htmlpreview.github.io/?https://raw.githubusercontent.com/andrew-edwards/sizeSpectra/master/doc/MEPS_IBTS_MLEbins.html

# Forsmark
dataBin_FM <- dataBintest %>%
  filter(area == "FM") %>%
  select(SpecCode, wmin, wmax, Number)
#> filter: removed 12 rows (50%), 12 rows remaining
#> select: dropped 8 variables (X, area, min_length_group_cm, catch_n, effort_id, …)

  n_FM = sum(dataBin_FM$Number)
  xmin_FM = min(dataBin_FM$wmin)
  xmax_FM = max(dataBin_FM$wmax)
  
  FM_spectra  = calcLike(negLL.fn = negLL.PLB.bins.species,
                         p = -1.5,
                         suppress.warnings = FALSE,
                         vecDiff = 0.5, # Default is 0.5, I get warning though
                         dataBinForLike = dataBin_FM,
                         n = n_FM,
                         xmin = xmin_FM,
                         xmax = xmax_FM)
#> Warning in nlm(f = negLL.fn, p = p, ...): NA/Inf replaced by maximum positive
#> value
  
  FM_spectra_df = data.frame(Year = 2000,
                             xmin = xmin_FM,
                             xmax = xmax_FM,
                             n = n_FM,
                             b = FM_spectra$MLE,
                             confMin = FM_spectra$conf[1],
                             confMax = FM_spectra$conf[2])
  
  
# Biotest
dataBin_BT <- dataBintest %>%
  filter(area == "BT") %>%
  select(SpecCode, wmin, wmax, Number)
#> filter: removed 12 rows (50%), 12 rows remaining
#> select: dropped 8 variables (X, area, min_length_group_cm, catch_n, effort_id, …)

n_BT = sum(dataBin_BT$Number)
xmin_BT = min(dataBin_BT$wmin)
xmax_BT = max(dataBin_BT$wmax)

BT_spectra  = calcLike(negLL.fn = negLL.PLB.bins.species,
                       p = -1.5,
                       suppress.warnings = FALSE,
                       vecDiff = 0.5, # Default is 0.5, I get warning though
                       dataBinForLike = dataBin_BT,
                       n = n_BT,
                       xmin = xmin_BT,
                       xmax = xmax_BT)

BT_spectra_df = data.frame(Year = 2000,
                           xmin = xmin_BT,
                           xmax = xmax_BT,
                           n = n_BT,
                           b = BT_spectra$MLE,
                           confMin = BT_spectra$conf[1],
                           confMax = BT_spectra$conf[2])

FM_spectra_df
BT_spectra_df


#** Loop and extract data for plotting =============================================
# https://htmlpreview.github.io/?https://raw.githubusercontent.com/andrew-edwards/sizeSpectra/master/doc/MEPS_IBTS_recommend.html
# These calculations are to get the required input for the recommended plot
# (see ?sizeSpectra::ISD_bin_plot for the structure). This could maybe be functionalised
# like the plotting function ISD_bin_plot().

# Forsmark ==========================================================================
dataBin_FM$Year <- 2000
dataRecommend.isd_FM = dplyr::select(dataBin_FM, Year, wmin, wmax, Number)

data.year.fm <- dplyr::arrange(dataRecommend.isd_FM, desc(wmin))

sumNumber = sum(data.year.fm$Number)

wmin.vec = data.year.fm$wmin
wmax.vec = data.year.fm$wmax
num.vec  = data.year.fm$Number
  
countGTEwmin = rep(NA, length(num.vec)) # to do a manual count
lowCount = countGTEwmin
highCount = countGTEwmin
  
for(iii in 1:length(countGTEwmin))
  {
    countGTEwmin[iii] = sum( (wmin.vec >= wmin.vec[iii]) * num.vec)
    lowCount[iii]  = sum( (wmin.vec >= wmax.vec[iii]) * num.vec)
    highCount[iii] = sum( (wmax.vec >  wmin.vec[iii]) * num.vec)
  }

data.year.fm = cbind(data.year.fm,
                 "countGTEwmin" = countGTEwmin,
                 "lowCount" = lowCount,
                 "highCount" = highCount)

data.year.fm = dplyr::tbl_df(data.year.fm)
#> Warning: `tbl_df()` was deprecated in dplyr 1.0.0.
#> ℹ Please use `tibble::as_tibble()` instead.
  
xlim.global = c(min(data.year.fm$wmin),
                max(data.year.fm$wmax))   # x-axis limits to be common for all plots

ISD_bin_plot(data.year = data.year.fm,
             b.MLE = FM_spectra_df$b,
             b.confMin = FM_spectra_df$confMin,
             b.confMax = FM_spectra_df$confMax,
             year = 2000,
             xlim = xlim.global,
             xmin = FM_spectra_df$xmin,
             xmax = FM_spectra_df$xmax
)



# Biotest ==========================================================================
dataBin_BT$Year <- 2000

dataRecommend.isd_BT = dplyr::select(dataBin_BT, Year, wmin, wmax, Number)

data.year.bt <- dplyr::arrange(dataRecommend.isd_BT, desc(wmin))

sumNumber = sum(data.year.bt$Number)

wmin.vec = data.year.bt$wmin
wmax.vec = data.year.bt$wmax
num.vec  = data.year.bt$Number

countGTEwmin = rep(NA, length(num.vec)) # to do a manual count
lowCount = countGTEwmin
highCount = countGTEwmin

for(iii in 1:length(countGTEwmin))
{
  countGTEwmin[iii] = sum( (wmin.vec >= wmin.vec[iii]) * num.vec)
  lowCount[iii]  = sum( (wmin.vec >= wmax.vec[iii]) * num.vec)
  highCount[iii] = sum( (wmax.vec >  wmin.vec[iii]) * num.vec)
}

data.year.bt = cbind(data.year.bt,
                  "countGTEwmin" = countGTEwmin,
                  "lowCount" = lowCount,
                  "highCount" = highCount)

data.year.bt = dplyr::tbl_df(data.year.bt)

xlim.global = c(min(data.year.bt$wmin),
                max(data.year.bt$wmax))   # x-axis limits to be common for all plots

ISD_bin_plot(data.year = data.year.bt,
             b.MLE = BT_spectra_df$b,
             b.confMin = BT_spectra_df$confMin,
             b.confMax = BT_spectra_df$confMax,
             year = 2000,
             xlim = xlim.global,
             xmin = BT_spectra_df$xmin,
             xmax = BT_spectra_df$xmax
)

Produce figures

pal <- rev(brewer.pal(n = 6, name = "Paired")[c(2, 6)])

# https://htmlpreview.github.io/?https://raw.githubusercontent.com/andrew-edwards/sizeSpectra/master/doc/MEPS_IBTS_MLEbins.html

FM_spectra = dplyr::tbl_df(FM_spectra_df)
FM_spectra = dplyr::mutate(FM_spectra, stdErr = (abs(confMin-b) + abs(confMax-b))/(2*1.96) )
FM_spectra = dplyr::mutate(FM_spectra, area = "FM")

BT_spectra = dplyr::tbl_df(BT_spectra_df)
BT_spectra = dplyr::mutate(BT_spectra, stdErr = (abs(confMin-b) + abs(confMax-b))/(2*1.96) )
BT_spectra = dplyr::mutate(BT_spectra, area = "BT")

spectra <- rbind(BT_spectra, FM_spectra)

# Now make a ggplot-version to put in the paper as an example
# First put in in a dataframe instead of adding them as lines in the base plot
# BT (warm)
data_year_bt <- data.year.bt
data_year_bt$area <- "Warm"
sumNumber = sum(data_year_bt$Number)

xmin = BT_spectra$xmin
xmax = BT_spectra$xmax
x.PLB = seq(xmin_BT, xmax_BT, length = 10001)
b.MLE = BT_spectra$b
b.confMin = BT_spectra$confMin
b.confMax = BT_spectra$confMax

data_year_bt2 <- data.frame(x.PLB = x.PLB,
                            y.PLB = (1 - pPLB(x = x.PLB, b = b.MLE, xmin = min(x.PLB), xmax = max(x.PLB))) * sumNumber,
                            y.PLB.confMin = (1 - pPLB(x = x.PLB, b = b.confMin, xmin = min(x.PLB), xmax = max(x.PLB))) * sumNumber,
                            y.PLB.confMax = (1 - pPLB(x = x.PLB, b = b.confMax, xmin = min(x.PLB), xmax = max(x.PLB))) * sumNumber,
                            area = "Warm")

# FM (cold)
data_year_fm <- data.year.fm
data_year_fm$area <- "Cold"
sumNumber = sum(data_year_fm$Number)

xmin = FM_spectra$xmin
xmax = FM_spectra$xmax
x.PLB = seq(xmin_FM, xmax_FM, length = 10001)
b.MLE = FM_spectra$b
b.confMin = FM_spectra$confMin
b.confMax = FM_spectra$confMax

data_year_fm2 <- data.frame(x.PLB = x.PLB,
                            y.PLB = (1 - pPLB(x = x.PLB, b = b.MLE, xmin = min(x.PLB), xmax = max(x.PLB))) * sumNumber,
                            y.PLB.confMin = (1 - pPLB(x = x.PLB, b = b.confMin, xmin = min(x.PLB), xmax = max(x.PLB))) * sumNumber,
                            y.PLB.confMax = (1 - pPLB(x = x.PLB, b = b.confMax, xmin = min(x.PLB), xmax = max(x.PLB))) * sumNumber,
                            area = "Cold")

data_year <- rbind(data_year_bt, data_year_fm)
data_year2 <- rbind(data_year_bt2, data_year_fm2)

b_warm <- round(BT_spectra$b, 2) 
b_cold <- round(FM_spectra$b, 2)
b_warm_upr <- round(BT_spectra$confMax, 2)
b_cold_upr <- round(FM_spectra$confMax, 2)
b_warm_lwr <- round(BT_spectra$confMin, 2)
b_cold_lwr <- round(FM_spectra$confMin, 2)

data_year2 <- data_year2 %>% 
  mutate(area_plot = ifelse(area == "Warm", "Heat", "Ref"))
#> mutate: new variable 'area_plot' (character) with 2 unique values and 0% NA

pmle <- ggplot(data_year2) +
  geom_rect(data = data_year,
            aes(xmin = wmin, xmax = wmax, ymin = lowCount, ymax = highCount, fill = area),
            alpha = 0.2) +
  geom_line(aes(x.PLB, y.PLB, color = area_plot), size = 1.3, alpha = 0.8) +
  geom_line(aes(x.PLB, y.PLB.confMin, color = area_plot), linetype = 3, alpha = 0.8) +
  geom_line(aes(x.PLB, y.PLB.confMax, color = area_plot), linetype = 3, alpha = 0.8) +
  scale_color_manual(values = pal, name = "Area") +
  scale_fill_manual(values = rev(pal)) + 
  scale_x_log10() +
  scale_y_log10() +
  guides(fill = "none") + 
  coord_cartesian(ylim = c(0.01, 1000)) +
  labs(x = "Body mass, x [g]", y = "No. individuals \u2265 x") +
  theme(legend.position = c(0.8, 0.9)) +
  guides(color = guide_legend(override.aes = list(linetype = 1, size = 2, alpha = 0.3, color = pal))) +
  theme(text = element_text(size = 12), 
        legend.position = c(0.9, 0.9),
        legend.spacing.y = unit(0, 'cm'),
        legend.key.size = unit(0, "cm"),
        legend.title = element_text(size = 10),
        legend.text = element_text(size = 8))
pmle
#> Warning: Transformation introduced infinite values in continuous y-axis
#> Transformation introduced infinite values in continuous y-axis
#> Transformation introduced infinite values in continuous y-axis
#> Transformation introduced infinite values in continuous y-axis


# Now plot the CI's
BT_spectra
spec_tot <- rbind(BT_spectra, FM_spectra) %>% 
  mutate(Area = ifelse(area == "BT", "Heated", "Reference"))
#> mutate: new variable 'Area' (character) with 2 unique values and 0% NA

est_plot <- ggplot(spec_tot, aes(Area, b, color = Area)) + 
  geom_point(size = 3) +
  scale_color_manual(values = pal) +
  guides(color = "none") +
  labs(y = expression(paste("Size-spectrum exponent ", italic((gamma))))) +
  geom_errorbar(aes(x = Area, ymin = confMin, ymax = confMax), width = 0, size = 0.75)

phist <- 
  size_df %>%
  mutate(Area2 = ifelse(Area == "BT", "Warm", "Cold")) %>% 
  ggplot(., aes(x = length_group, fill = Area2, group = Area2)) +
  stat_count(mapping = aes(x = length_group, y = ..prop.., group = Area2),
             position = position_dodge(), alpha = 0.8) +
  scale_fill_manual(values = rev(pal)) +
  theme_light() + 
  coord_cartesian(expand = 0) +
  labs(x = "Length group [cm]", y = "Proportion") +
  guides(fill = "none") +
  theme(text = element_text(size = 12))
#> mutate: new variable 'Area2' (character) with 2 unique values and 0% NA

# Combine plots
#pmle / (est_plot | phist) + plot_annotation(tag_levels = "A") #+ plot_layout(widths = c(5, 1, 1))

# This is beacuse ggsave is producing an erorr when I save to pdf when using larger or equal-sign
grDevices::cairo_pdf("figures/size_spec.pdf", width = 7.87402, height = 7.87402)

pmle / (est_plot | phist) + plot_annotation(tag_levels = "A")
#> Warning: Transformation introduced infinite values in continuous y-axis
#> Transformation introduced infinite values in continuous y-axis
#> Transformation introduced infinite values in continuous y-axis
#> Transformation introduced infinite values in continuous y-axis

dev.off()
#> quartz_off_screen 
#>                 2