Load libraries

library(tidyverse); theme_set(theme_classic(base_size = 12))
#> ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.1 ──
#> ✓ ggplot2 3.3.5     ✓ purrr   0.3.4
#> ✓ tibble  3.1.5     ✓ dplyr   1.0.7
#> ✓ tidyr   1.1.4     ✓ stringr 1.4.0
#> ✓ readr   2.1.1     ✓ forcats 0.5.1
#> ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
#> x dplyr::filter() masks stats::filter()
#> x dplyr::lag()    masks stats::lag()
library(tidylog)
#> 
#> Attaching package: 'tidylog'
#> The following objects are masked from 'package:dplyr':
#> 
#>     add_count, add_tally, anti_join, count, distinct, distinct_all,
#>     distinct_at, distinct_if, filter, filter_all, filter_at, filter_if,
#>     full_join, group_by, group_by_all, group_by_at, group_by_if,
#>     inner_join, left_join, mutate, mutate_all, mutate_at, mutate_if,
#>     relocate, rename, rename_all, rename_at, rename_if, rename_with,
#>     right_join, sample_frac, sample_n, select, select_all, select_at,
#>     select_if, semi_join, slice, slice_head, slice_max, slice_min,
#>     slice_sample, slice_tail, summarise, summarise_all, summarise_at,
#>     summarise_if, summarize, summarize_all, summarize_at, summarize_if,
#>     tally, top_frac, top_n, transmute, transmute_all, transmute_at,
#>     transmute_if, ungroup
#> The following objects are masked from 'package:tidyr':
#> 
#>     drop_na, fill, gather, pivot_longer, pivot_wider, replace_na,
#>     spread, uncount
#> The following object is masked from 'package:stats':
#> 
#>     filter
library(RColorBrewer)
library(patchwork)
library(sizeSpectra)
library(brms)
#> Loading required package: Rcpp
#> Warning: package 'Rcpp' was built under R version 4.0.5
#> Loading 'brms' package (version 2.17.0). Useful instructions
#> can be found by typing help('brms'). A more detailed introduction
#> to the package is available through vignette('brms_overview').
#> 
#> Attaching package: 'brms'
#> The following object is masked from 'package:stats':
#> 
#>     ar
library(nlstools)
#> 
#> 'nlstools' has been loaded.
#> IMPORTANT NOTICE: Most nonlinear regression models and data set examples
#> related to predictive microbiolgy have been moved to the package 'nlsMicrobio'
library(bayesplot)
#> This is bayesplot version 1.7.2
#> - Online documentation and vignettes at mc-stan.org/bayesplot
#> - bayesplot theme set to bayesplot::theme_default()
#>    * Does _not_ affect other ggplot2 plots
#>    * See ?bayesplot_theme_set for details on theme setting
library(tidylog)
library(tidybayes)
#> 
#> Attaching package: 'tidybayes'
#> The following objects are masked from 'package:brms':
#> 
#>     dstudent_t, pstudent_t, qstudent_t, rstudent_t
library(RColorBrewer)
library(modelr)
library(viridis)
#> Loading required package: viridisLite
#> 
#> Attaching package: 'viridis'
#> The following object is masked from 'package:viridisLite':
#> 
#>     viridis.map
library(ggridges)
#> 
#> Attaching package: 'ggridges'
#> The following objects are masked from 'package:tidybayes':
#> 
#>     scale_point_color_continuous, scale_point_color_discrete,
#>     scale_point_colour_continuous, scale_point_colour_discrete,
#>     scale_point_fill_continuous, scale_point_fill_discrete,
#>     scale_point_size_continuous

# Print package versions
sessionInfo()
#> R version 4.0.2 (2020-06-22)
#> Platform: x86_64-apple-darwin17.0 (64-bit)
#> Running under: macOS  10.16
#> 
#> Matrix products: default
#> BLAS:   /Library/Frameworks/R.framework/Versions/4.0/Resources/lib/libRblas.dylib
#> LAPACK: /Library/Frameworks/R.framework/Versions/4.0/Resources/lib/libRlapack.dylib
#> 
#> locale:
#> [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
#> 
#> attached base packages:
#> [1] stats     graphics  grDevices utils     datasets  methods   base     
#> 
#> other attached packages:
#>  [1] ggridges_0.5.2      viridis_0.5.1       viridisLite_0.4.0  
#>  [4] modelr_0.1.8        tidybayes_3.0.1     bayesplot_1.7.2    
#>  [7] nlstools_1.0-2      brms_2.17.0         Rcpp_1.0.8         
#> [10] sizeSpectra_1.0.0.0 patchwork_1.1.1     RColorBrewer_1.1-2 
#> [13] tidylog_1.0.2       forcats_0.5.1       stringr_1.4.0      
#> [16] dplyr_1.0.7         purrr_0.3.4         readr_2.1.1        
#> [19] tidyr_1.1.4         tibble_3.1.5        ggplot2_3.3.5      
#> [22] tidyverse_1.3.1    
#> 
#> loaded via a namespace (and not attached):
#>   [1] readxl_1.3.1         backports_1.3.0      plyr_1.8.6          
#>   [4] igraph_1.2.5         svUnit_1.0.6         splines_4.0.2       
#>   [7] crosstalk_1.1.0.1    rstantools_2.1.1     inline_0.3.15       
#>  [10] digest_0.6.28        htmltools_0.5.2      rsconnect_0.8.16    
#>  [13] fansi_0.5.0          magrittr_2.0.1       checkmate_2.0.0     
#>  [16] tzdb_0.2.0           RcppParallel_5.1.4   matrixStats_0.61.0  
#>  [19] xts_0.12-0           prettyunits_1.1.1    colorspace_2.0-2    
#>  [22] rvest_1.0.2          ggdist_3.0.0         haven_2.3.1         
#>  [25] xfun_0.27            callr_3.7.0          crayon_1.4.2        
#>  [28] jsonlite_1.7.2       lme4_1.1-26          zoo_1.8-8           
#>  [31] glue_1.6.2           gtable_0.3.0         emmeans_1.5.0       
#>  [34] V8_3.2.0             distributional_0.2.2 pkgbuild_1.2.0      
#>  [37] rstan_2.21.2         abind_1.4-5          scales_1.1.1        
#>  [40] mvtnorm_1.1-3        DBI_1.1.1            miniUI_0.1.1.1      
#>  [43] xtable_1.8-4         clisymbols_1.2.0     stats4_4.0.2        
#>  [46] StanHeaders_2.21.0-7 DT_0.15              htmlwidgets_1.5.1   
#>  [49] httr_1.4.2           threejs_0.3.3        arrayhelpers_1.1-0  
#>  [52] posterior_1.1.0      ellipsis_0.3.2       pkgconfig_2.0.3     
#>  [55] loo_2.3.1            farver_2.1.0         sass_0.3.1          
#>  [58] dbplyr_2.1.1         utf8_1.2.2           tidyselect_1.1.1    
#>  [61] rlang_1.0.2          reshape2_1.4.4       later_1.3.0         
#>  [64] munsell_0.5.0        cellranger_1.1.0     tools_4.0.2         
#>  [67] cli_3.1.0            generics_0.1.2       broom_0.7.10        
#>  [70] evaluate_0.14        fastmap_1.1.0        yaml_2.2.1          
#>  [73] processx_3.5.2       knitr_1.36           fs_1.5.0            
#>  [76] nlme_3.1-148         mime_0.12            projpred_2.0.2      
#>  [79] xml2_1.3.2           compiler_4.0.2       shinythemes_1.1.2   
#>  [82] rstudioapi_0.13      curl_4.3.2           gamm4_0.2-6         
#>  [85] reprex_2.0.1         statmod_1.4.36       bslib_0.2.4         
#>  [88] stringi_1.7.5        ps_1.5.0             Brobdingnag_1.2-6   
#>  [91] lattice_0.20-41      Matrix_1.3-4         nloptr_1.2.2.2      
#>  [94] markdown_1.1         shinyjs_1.1          tensorA_0.36.2      
#>  [97] vctrs_0.3.8          pillar_1.6.4         lifecycle_1.0.1     
#> [100] jquerylib_0.1.4      bridgesampling_1.0-0 estimability_1.3    
#> [103] httpuv_1.5.5         R6_2.5.1             promises_1.1.1      
#> [106] gridExtra_2.3        codetools_0.2-16     boot_1.3-25         
#> [109] colourpicker_1.0     MASS_7.3-51.6        gtools_3.8.2        
#> [112] assertthat_0.2.1     withr_2.4.2          shinystan_2.5.0     
#> [115] mgcv_1.8-31          parallel_4.0.2       hms_1.1.1           
#> [118] grid_4.0.2           minqa_1.2.4          coda_0.19-4         
#> [121] rmarkdown_2.11       shiny_1.6.0          lubridate_1.8.0     
#> [124] base64enc_0.1-3      dygraphs_1.1.1.6

Read and add final touches to data

df <- read.csv("data/cleaned/catch_BT_FM_1987-2003.csv")

# How many nets in total? (For scaling with effort later)
df %>%
  group_by(netID) %>%
  mutate(n = n()) %>%
  ggplot(., aes(netID2, n)) +
  geom_histogram(stat ="identity")
#> group_by: one grouping variable (netID)
#> mutate (grouped): new variable 'n' (integer) with 185 unique values and 0% NA
#> Warning: Ignoring unknown parameters: binwidth, bins, pad


df <- df %>% group_by(Area, year) %>% mutate(n_nets_year = length(unique(netID2))) %>% ungroup()
#> group_by: 2 grouping variables (Area, year)
#> mutate (grouped): new variable 'n_nets_year' (integer) with 10 unique values and 0% NA
#> ungroup: no grouping variables

# Test
df %>% filter(year == 1995 & Area == "BT") %>% distinct(netID2)
#> filter: removed 53,506 rows (99%), 524 rows remaining
#> distinct: removed 495 rows (94%), 29 rows remaining

# Now we need to find representative catch sizes
# Plot n per length group (cm)
p1 <- ggplot(df, aes(factor(length_group))) +   
  geom_histogram(stat = "count") +
  facet_wrap(~Area, scales = "free") + # Note the differences in # data
  theme_classic() + 
  coord_cartesian(expand = 0)
#> Warning: Ignoring unknown parameters: binwidth, bins, pad

# Filter out fish > 13 cm
df <- df %>% filter(length_group > 13) %>% as.data.frame()
#> filter: removed 21,128 rows (39%), 32,902 rows remaining

# Plot again
p2 <- ggplot(df, aes(factor(length_group))) +   
  geom_histogram(stat = "count") +
  facet_wrap(~Area, scales = "free") + # Note the differences in # data
  theme_classic() + 
  coord_cartesian(expand = 0)
#> Warning: Ignoring unknown parameters: binwidth, bins, pad

p1/p2


# Before we process data any further, save it because we will use it in this format
# for analysing mean size (i.e. we need 1 row = 1 ind)

size_df <- df

# Now we want to process the data a bit further... Following Edwards sizeSpectra package
# we want the data as follows:
# Year  SpecCode    LngtClass   Number  LWa     LWb     bodyMass    Biomass
# Hence, for each year, we need to calculate the CPUE
# https://htmlpreview.github.io/?https://raw.githubusercontent.com/andrew-edwards/sizeSpectra/master/doc/MEPS_IBTS_MLEbins.html

# Do NOT group by year here... this is the aggregated
# Group by Area and length group, summarize and get n()
df2 <- df %>%
  group_by(Area, length_group) %>% 
  summarise(catch_n = n()) %>% # Get catch as a column, this is now the number of rows a given length occurs each year and area
  ungroup() %>% 
  mutate(effort_id = paste(Area, sep = ".")) %>% 
  as.data.frame()
#> group_by: 2 grouping variables (Area, length_group)
#> summarise: now 26 rows and 3 columns, one group variable remaining (Area)
#> ungroup: no grouping variables
#> mutate: new variable 'effort_id' (character) with 2 unique values and 0% NA

# Now we need to get the effort back in there
df_effort <- df %>%
  mutate(effort_id = paste(Area, sep = ".")) %>% 
  select(effort_id, n_nets_year) %>% 
  distinct(effort_id, .keep_all = TRUE) %>% 
  as.data.frame()
#> mutate: new variable 'effort_id' (character) with 2 unique values and 0% NA
#> select: dropped 10 variables (Area, Sektion, Station, year, week, …)
#> distinct: removed 32,900 rows (>99%), 2 rows remaining

# Now do a left_join
df3 <- left_join(df2, df_effort, by = "effort_id") %>% as.data.frame()
#> left_join: added one column (n_nets_year)
#>            > rows only in x    0
#>            > rows only in y  ( 0)
#>            > matched rows     26
#>            >                 ====
#>            > rows total       26

# Now do some additional calculations (weights etc, to match data structure in sizeSpectra package)
# LW parameters from FishBase (2020.09.24)
a <- 0.01 
b <- 3.08

df4 <- df3 %>% 
  ungroup() %>% 
  rename("area" = "Area",
         "min_length_group_cm" = "length_group") %>% # What we have now is the minimum length in each class
  mutate(max_length_group_cm = min_length_group_cm + 2.4, # Get max length in bin (2.5 cm length-classes)
         wmin = a*min_length_group_cm^b,     # Get min mass in bin
         wmax = a*max_length_group_cm^b) %>% # Get max mass in bin
  mutate(cpue_numbers = catch_n/n_nets_year, # Get numbers CPUE, divide by the previously create n_nets, which is # of unique net ID's in each area and year
         cpue_biom = (catch_n*((wmin + wmax)/2))/n_nets_year) %>% # Get biomass CPUE, use mean of mass in size range
  mutate(SpecCode = "Perch")
#> ungroup: no grouping variables
#> rename: renamed 2 variables (area, min_length_group_cm)
#> mutate: new variable 'max_length_group_cm' (double) with 13 unique values and 0% NA
#>         new variable 'wmin' (double) with 13 unique values and 0% NA
#>         new variable 'wmax' (double) with 13 unique values and 0% NA
#> mutate: new variable 'cpue_numbers' (double) with 26 unique values and 0% NA
#>         new variable 'cpue_biom' (double) with 26 unique values and 0% NA
#> mutate: new variable 'SpecCode' (character) with one unique value and 0% NA

# Test I get 1 unique row per size class and area
df4 %>%
  group_by(area, min_length_group_cm) %>%
  summarise(n = n()) %>% 
  filter(n == 1) %>% 
  as.data.frame()
#> group_by: 2 grouping variables (area, min_length_group_cm)
#> summarise: now 26 rows and 3 columns, one group variable remaining (area)
#> filter (grouped): no rows removed

# Plot the log biomass cpue as function of log weight
ggplot(df4, aes(log(wmax), log(cpue_biom))) + 
  stat_smooth(se = FALSE) +
  geom_point() +
  facet_wrap(~ area) +
  scale_color_viridis(discrete = TRUE) + 
  coord_cartesian(expand = 0) + 
  theme(aspect.ratio = 1)
#> `geom_smooth()` using method = 'loess' and formula 'y ~ x'


# Plot the total number of catches by length-group in this processed data
# Compare with p2, should be the same plot...
ggplot(df4, aes(min_length_group_cm, catch_n)) + 
  geom_bar(stat = "identity") + 
  facet_wrap(~ area, scales = "free") +
  theme_classic() + 
  coord_cartesian(expand = 0)


p2


# Now that we have CPUE and not COUNT, we can see if the filter to maximum catch size is
# appropriate
p3 <- ggplot(df4, aes(factor(round(wmin, digits = 3)), cpue_biom)) + 
  geom_bar(stat = "identity") + 
  facet_wrap(~ area, scales = "free") +
  theme_classic() + 
  coord_cartesian(expand = 0)

# Filter out fish > 13 cm
df4 <- df4 %>% filter(wmin > 50) %>% as.data.frame()
#> filter: removed 2 rows (8%), 24 rows remaining

# Plot again
p4 <- ggplot(df4, aes(factor(round(wmin, digits = 3)), cpue_biom)) + 
  geom_bar(stat = "identity") + 
  facet_wrap(~ area, scales = "free") +
  theme_classic() + 
  coord_cartesian(expand = 0)

p3/p4

Save data

write.csv(df4, "data/for_fitting/size_spectrum_dat.csv")