Load libraries
library(tidyverse); theme_set(theme_classic(base_size = 12))
#> ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.1 ──
#> ✓ ggplot2 3.3.5 ✓ purrr 0.3.4
#> ✓ tibble 3.1.5 ✓ dplyr 1.0.7
#> ✓ tidyr 1.1.4 ✓ stringr 1.4.0
#> ✓ readr 2.1.1 ✓ forcats 0.5.1
#> ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
#> x dplyr::filter() masks stats::filter()
#> x dplyr::lag() masks stats::lag()
library(tidylog)
#>
#> Attaching package: 'tidylog'
#> The following objects are masked from 'package:dplyr':
#>
#> add_count, add_tally, anti_join, count, distinct, distinct_all,
#> distinct_at, distinct_if, filter, filter_all, filter_at, filter_if,
#> full_join, group_by, group_by_all, group_by_at, group_by_if,
#> inner_join, left_join, mutate, mutate_all, mutate_at, mutate_if,
#> relocate, rename, rename_all, rename_at, rename_if, rename_with,
#> right_join, sample_frac, sample_n, select, select_all, select_at,
#> select_if, semi_join, slice, slice_head, slice_max, slice_min,
#> slice_sample, slice_tail, summarise, summarise_all, summarise_at,
#> summarise_if, summarize, summarize_all, summarize_at, summarize_if,
#> tally, top_frac, top_n, transmute, transmute_all, transmute_at,
#> transmute_if, ungroup
#> The following objects are masked from 'package:tidyr':
#>
#> drop_na, fill, gather, pivot_longer, pivot_wider, replace_na,
#> spread, uncount
#> The following object is masked from 'package:stats':
#>
#> filter
library(RColorBrewer)
library(patchwork)
library(sizeSpectra)
library(brms)
#> Loading required package: Rcpp
#> Warning: package 'Rcpp' was built under R version 4.0.5
#> Loading 'brms' package (version 2.17.0). Useful instructions
#> can be found by typing help('brms'). A more detailed introduction
#> to the package is available through vignette('brms_overview').
#>
#> Attaching package: 'brms'
#> The following object is masked from 'package:stats':
#>
#> ar
library(nlstools)
#>
#> 'nlstools' has been loaded.
#> IMPORTANT NOTICE: Most nonlinear regression models and data set examples
#> related to predictive microbiolgy have been moved to the package 'nlsMicrobio'
library(bayesplot)
#> This is bayesplot version 1.7.2
#> - Online documentation and vignettes at mc-stan.org/bayesplot
#> - bayesplot theme set to bayesplot::theme_default()
#> * Does _not_ affect other ggplot2 plots
#> * See ?bayesplot_theme_set for details on theme setting
library(tidylog)
library(tidybayes)
#>
#> Attaching package: 'tidybayes'
#> The following objects are masked from 'package:brms':
#>
#> dstudent_t, pstudent_t, qstudent_t, rstudent_t
library(RColorBrewer)
library(modelr)
library(viridis)
#> Loading required package: viridisLite
#>
#> Attaching package: 'viridis'
#> The following object is masked from 'package:viridisLite':
#>
#> viridis.map
library(ggridges)
#>
#> Attaching package: 'ggridges'
#> The following objects are masked from 'package:tidybayes':
#>
#> scale_point_color_continuous, scale_point_color_discrete,
#> scale_point_colour_continuous, scale_point_colour_discrete,
#> scale_point_fill_continuous, scale_point_fill_discrete,
#> scale_point_size_continuous
# Print package versions
sessionInfo()
#> R version 4.0.2 (2020-06-22)
#> Platform: x86_64-apple-darwin17.0 (64-bit)
#> Running under: macOS 10.16
#>
#> Matrix products: default
#> BLAS: /Library/Frameworks/R.framework/Versions/4.0/Resources/lib/libRblas.dylib
#> LAPACK: /Library/Frameworks/R.framework/Versions/4.0/Resources/lib/libRlapack.dylib
#>
#> locale:
#> [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
#>
#> attached base packages:
#> [1] stats graphics grDevices utils datasets methods base
#>
#> other attached packages:
#> [1] ggridges_0.5.2 viridis_0.5.1 viridisLite_0.4.0
#> [4] modelr_0.1.8 tidybayes_3.0.1 bayesplot_1.7.2
#> [7] nlstools_1.0-2 brms_2.17.0 Rcpp_1.0.8
#> [10] sizeSpectra_1.0.0.0 patchwork_1.1.1 RColorBrewer_1.1-2
#> [13] tidylog_1.0.2 forcats_0.5.1 stringr_1.4.0
#> [16] dplyr_1.0.7 purrr_0.3.4 readr_2.1.1
#> [19] tidyr_1.1.4 tibble_3.1.5 ggplot2_3.3.5
#> [22] tidyverse_1.3.1
#>
#> loaded via a namespace (and not attached):
#> [1] readxl_1.3.1 backports_1.3.0 plyr_1.8.6
#> [4] igraph_1.2.5 svUnit_1.0.6 splines_4.0.2
#> [7] crosstalk_1.1.0.1 rstantools_2.1.1 inline_0.3.15
#> [10] digest_0.6.28 htmltools_0.5.2 rsconnect_0.8.16
#> [13] fansi_0.5.0 magrittr_2.0.1 checkmate_2.0.0
#> [16] tzdb_0.2.0 RcppParallel_5.1.4 matrixStats_0.61.0
#> [19] xts_0.12-0 prettyunits_1.1.1 colorspace_2.0-2
#> [22] rvest_1.0.2 ggdist_3.0.0 haven_2.3.1
#> [25] xfun_0.27 callr_3.7.0 crayon_1.4.2
#> [28] jsonlite_1.7.2 lme4_1.1-26 zoo_1.8-8
#> [31] glue_1.6.2 gtable_0.3.0 emmeans_1.5.0
#> [34] V8_3.2.0 distributional_0.2.2 pkgbuild_1.2.0
#> [37] rstan_2.21.2 abind_1.4-5 scales_1.1.1
#> [40] mvtnorm_1.1-3 DBI_1.1.1 miniUI_0.1.1.1
#> [43] xtable_1.8-4 clisymbols_1.2.0 stats4_4.0.2
#> [46] StanHeaders_2.21.0-7 DT_0.15 htmlwidgets_1.5.1
#> [49] httr_1.4.2 threejs_0.3.3 arrayhelpers_1.1-0
#> [52] posterior_1.1.0 ellipsis_0.3.2 pkgconfig_2.0.3
#> [55] loo_2.3.1 farver_2.1.0 sass_0.3.1
#> [58] dbplyr_2.1.1 utf8_1.2.2 tidyselect_1.1.1
#> [61] rlang_1.0.2 reshape2_1.4.4 later_1.3.0
#> [64] munsell_0.5.0 cellranger_1.1.0 tools_4.0.2
#> [67] cli_3.1.0 generics_0.1.2 broom_0.7.10
#> [70] evaluate_0.14 fastmap_1.1.0 yaml_2.2.1
#> [73] processx_3.5.2 knitr_1.36 fs_1.5.0
#> [76] nlme_3.1-148 mime_0.12 projpred_2.0.2
#> [79] xml2_1.3.2 compiler_4.0.2 shinythemes_1.1.2
#> [82] rstudioapi_0.13 curl_4.3.2 gamm4_0.2-6
#> [85] reprex_2.0.1 statmod_1.4.36 bslib_0.2.4
#> [88] stringi_1.7.5 ps_1.5.0 Brobdingnag_1.2-6
#> [91] lattice_0.20-41 Matrix_1.3-4 nloptr_1.2.2.2
#> [94] markdown_1.1 shinyjs_1.1 tensorA_0.36.2
#> [97] vctrs_0.3.8 pillar_1.6.4 lifecycle_1.0.1
#> [100] jquerylib_0.1.4 bridgesampling_1.0-0 estimability_1.3
#> [103] httpuv_1.5.5 R6_2.5.1 promises_1.1.1
#> [106] gridExtra_2.3 codetools_0.2-16 boot_1.3-25
#> [109] colourpicker_1.0 MASS_7.3-51.6 gtools_3.8.2
#> [112] assertthat_0.2.1 withr_2.4.2 shinystan_2.5.0
#> [115] mgcv_1.8-31 parallel_4.0.2 hms_1.1.1
#> [118] grid_4.0.2 minqa_1.2.4 coda_0.19-4
#> [121] rmarkdown_2.11 shiny_1.6.0 lubridate_1.8.0
#> [124] base64enc_0.1-3 dygraphs_1.1.1.6
Read and add final touches to data
df <- read.csv("data/cleaned/catch_BT_FM_1987-2003.csv")
# How many nets in total? (For scaling with effort later)
df %>%
group_by(netID) %>%
mutate(n = n()) %>%
ggplot(., aes(netID2, n)) +
geom_histogram(stat ="identity")
#> group_by: one grouping variable (netID)
#> mutate (grouped): new variable 'n' (integer) with 185 unique values and 0% NA
#> Warning: Ignoring unknown parameters: binwidth, bins, pad

df <- df %>% group_by(Area, year) %>% mutate(n_nets_year = length(unique(netID2))) %>% ungroup()
#> group_by: 2 grouping variables (Area, year)
#> mutate (grouped): new variable 'n_nets_year' (integer) with 10 unique values and 0% NA
#> ungroup: no grouping variables
# Test
df %>% filter(year == 1995 & Area == "BT") %>% distinct(netID2)
#> filter: removed 53,506 rows (99%), 524 rows remaining
#> distinct: removed 495 rows (94%), 29 rows remaining
# Now we need to find representative catch sizes
# Plot n per length group (cm)
p1 <- ggplot(df, aes(factor(length_group))) +
geom_histogram(stat = "count") +
facet_wrap(~Area, scales = "free") + # Note the differences in # data
theme_classic() +
coord_cartesian(expand = 0)
#> Warning: Ignoring unknown parameters: binwidth, bins, pad
# Filter out fish > 13 cm
df <- df %>% filter(length_group > 13) %>% as.data.frame()
#> filter: removed 21,128 rows (39%), 32,902 rows remaining
# Plot again
p2 <- ggplot(df, aes(factor(length_group))) +
geom_histogram(stat = "count") +
facet_wrap(~Area, scales = "free") + # Note the differences in # data
theme_classic() +
coord_cartesian(expand = 0)
#> Warning: Ignoring unknown parameters: binwidth, bins, pad
p1/p2

# Before we process data any further, save it because we will use it in this format
# for analysing mean size (i.e. we need 1 row = 1 ind)
size_df <- df
# Now we want to process the data a bit further... Following Edwards sizeSpectra package
# we want the data as follows:
# Year SpecCode LngtClass Number LWa LWb bodyMass Biomass
# Hence, for each year, we need to calculate the CPUE
# https://htmlpreview.github.io/?https://raw.githubusercontent.com/andrew-edwards/sizeSpectra/master/doc/MEPS_IBTS_MLEbins.html
# Do NOT group by year here... this is the aggregated
# Group by Area and length group, summarize and get n()
df2 <- df %>%
group_by(Area, length_group) %>%
summarise(catch_n = n()) %>% # Get catch as a column, this is now the number of rows a given length occurs each year and area
ungroup() %>%
mutate(effort_id = paste(Area, sep = ".")) %>%
as.data.frame()
#> group_by: 2 grouping variables (Area, length_group)
#> summarise: now 26 rows and 3 columns, one group variable remaining (Area)
#> ungroup: no grouping variables
#> mutate: new variable 'effort_id' (character) with 2 unique values and 0% NA
# Now we need to get the effort back in there
df_effort <- df %>%
mutate(effort_id = paste(Area, sep = ".")) %>%
select(effort_id, n_nets_year) %>%
distinct(effort_id, .keep_all = TRUE) %>%
as.data.frame()
#> mutate: new variable 'effort_id' (character) with 2 unique values and 0% NA
#> select: dropped 10 variables (Area, Sektion, Station, year, week, …)
#> distinct: removed 32,900 rows (>99%), 2 rows remaining
# Now do a left_join
df3 <- left_join(df2, df_effort, by = "effort_id") %>% as.data.frame()
#> left_join: added one column (n_nets_year)
#> > rows only in x 0
#> > rows only in y ( 0)
#> > matched rows 26
#> > ====
#> > rows total 26
# Now do some additional calculations (weights etc, to match data structure in sizeSpectra package)
# LW parameters from FishBase (2020.09.24)
a <- 0.01
b <- 3.08
df4 <- df3 %>%
ungroup() %>%
rename("area" = "Area",
"min_length_group_cm" = "length_group") %>% # What we have now is the minimum length in each class
mutate(max_length_group_cm = min_length_group_cm + 2.4, # Get max length in bin (2.5 cm length-classes)
wmin = a*min_length_group_cm^b, # Get min mass in bin
wmax = a*max_length_group_cm^b) %>% # Get max mass in bin
mutate(cpue_numbers = catch_n/n_nets_year, # Get numbers CPUE, divide by the previously create n_nets, which is # of unique net ID's in each area and year
cpue_biom = (catch_n*((wmin + wmax)/2))/n_nets_year) %>% # Get biomass CPUE, use mean of mass in size range
mutate(SpecCode = "Perch")
#> ungroup: no grouping variables
#> rename: renamed 2 variables (area, min_length_group_cm)
#> mutate: new variable 'max_length_group_cm' (double) with 13 unique values and 0% NA
#> new variable 'wmin' (double) with 13 unique values and 0% NA
#> new variable 'wmax' (double) with 13 unique values and 0% NA
#> mutate: new variable 'cpue_numbers' (double) with 26 unique values and 0% NA
#> new variable 'cpue_biom' (double) with 26 unique values and 0% NA
#> mutate: new variable 'SpecCode' (character) with one unique value and 0% NA
# Test I get 1 unique row per size class and area
df4 %>%
group_by(area, min_length_group_cm) %>%
summarise(n = n()) %>%
filter(n == 1) %>%
as.data.frame()
#> group_by: 2 grouping variables (area, min_length_group_cm)
#> summarise: now 26 rows and 3 columns, one group variable remaining (area)
#> filter (grouped): no rows removed
# Plot the log biomass cpue as function of log weight
ggplot(df4, aes(log(wmax), log(cpue_biom))) +
stat_smooth(se = FALSE) +
geom_point() +
facet_wrap(~ area) +
scale_color_viridis(discrete = TRUE) +
coord_cartesian(expand = 0) +
theme(aspect.ratio = 1)
#> `geom_smooth()` using method = 'loess' and formula 'y ~ x'

# Plot the total number of catches by length-group in this processed data
# Compare with p2, should be the same plot...
ggplot(df4, aes(min_length_group_cm, catch_n)) +
geom_bar(stat = "identity") +
facet_wrap(~ area, scales = "free") +
theme_classic() +
coord_cartesian(expand = 0)

p2

# Now that we have CPUE and not COUNT, we can see if the filter to maximum catch size is
# appropriate
p3 <- ggplot(df4, aes(factor(round(wmin, digits = 3)), cpue_biom)) +
geom_bar(stat = "identity") +
facet_wrap(~ area, scales = "free") +
theme_classic() +
coord_cartesian(expand = 0)
# Filter out fish > 13 cm
df4 <- df4 %>% filter(wmin > 50) %>% as.data.frame()
#> filter: removed 2 rows (8%), 24 rows remaining
# Plot again
p4 <- ggplot(df4, aes(factor(round(wmin, digits = 3)), cpue_biom)) +
geom_bar(stat = "identity") +
facet_wrap(~ area, scales = "free") +
theme_classic() +
coord_cartesian(expand = 0)
p3/p4

Save data
write.csv(df4, "data/for_fitting/size_spectrum_dat.csv")