For areas separately, using Edwards et al’s MLEbin method
# Load libraries (install first if needed)
library(tidyverse); theme_set(theme_classic(base_size = 12))
#> Warning: package 'tidyr' was built under R version 4.0.5
library(tidylog)
library(RColorBrewer)
#> Warning: package 'RColorBrewer' was built under R version 4.0.5
library(patchwork)
library(sizeSpectra)
library(brms)
#> Warning: package 'Rcpp' was built under R version 4.0.5
library(nlstools)
library(bayesplot)
library(tidylog)
library(tidybayes)
library(RColorBrewer)
library(modelr)
library(viridis)
library(ggridges)
library(grDevices)
# Print package versions for versions
sessionInfo()
#> R version 4.0.2 (2020-06-22)
#> Platform: x86_64-apple-darwin17.0 (64-bit)
#> Running under: macOS 10.16
#>
#> Matrix products: default
#> BLAS: /Library/Frameworks/R.framework/Versions/4.0/Resources/lib/libRblas.dylib
#> LAPACK: /Library/Frameworks/R.framework/Versions/4.0/Resources/lib/libRlapack.dylib
#>
#> locale:
#> [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
#>
#> attached base packages:
#> [1] stats graphics grDevices utils datasets methods base
#>
#> other attached packages:
#> [1] ggridges_0.5.2 viridis_0.5.1 viridisLite_0.4.1
#> [4] modelr_0.1.8 tidybayes_3.0.1 bayesplot_1.7.2
#> [7] nlstools_1.0-2 brms_2.17.0 Rcpp_1.0.8
#> [10] sizeSpectra_1.0.0.0 patchwork_1.1.1 RColorBrewer_1.1-3
#> [13] tidylog_1.0.2 forcats_0.5.1 stringr_1.4.1
#> [16] dplyr_1.0.10 purrr_0.3.4 readr_2.1.1
#> [19] tidyr_1.2.0 tibble_3.1.8 ggplot2_3.3.6
#> [22] tidyverse_1.3.2
#>
#> loaded via a namespace (and not attached):
#> [1] readxl_1.3.1 backports_1.3.0 plyr_1.8.6
#> [4] igraph_1.2.5 svUnit_1.0.6 splines_4.0.2
#> [7] crosstalk_1.1.0.1 TH.data_1.1-1 rstantools_2.1.1
#> [10] inline_0.3.15 digest_0.6.30 htmltools_0.5.3
#> [13] rsconnect_0.8.16 fansi_1.0.3 magrittr_2.0.3
#> [16] checkmate_2.0.0 googlesheets4_1.0.0 tzdb_0.2.0
#> [19] RcppParallel_5.1.4 matrixStats_0.61.0 xts_0.12-0
#> [22] sandwich_3.0-2 prettyunits_1.1.1 colorspace_2.0-3
#> [25] rvest_1.0.3 ggdist_3.0.0 haven_2.5.1
#> [28] xfun_0.33 callr_3.7.2 crayon_1.4.2
#> [31] jsonlite_1.8.0 lme4_1.1-26 survival_3.1-12
#> [34] zoo_1.8-8 glue_1.6.2 gtable_0.3.1
#> [37] gargle_1.2.0 emmeans_1.5.0 V8_3.2.0
#> [40] distributional_0.2.2 pkgbuild_1.3.1 rstan_2.21.2
#> [43] abind_1.4-5 scales_1.2.1 mvtnorm_1.1-3
#> [46] DBI_1.1.1 miniUI_0.1.1.1 xtable_1.8-4
#> [49] clisymbols_1.2.0 stats4_4.0.2 StanHeaders_2.21.0-7
#> [52] DT_0.15 htmlwidgets_1.5.4 httr_1.4.4
#> [55] threejs_0.3.3 arrayhelpers_1.1-0 posterior_1.1.0
#> [58] ellipsis_0.3.2 pkgconfig_2.0.3 loo_2.3.1
#> [61] farver_2.1.1 sass_0.4.2 dbplyr_2.1.1
#> [64] utf8_1.2.2 tidyselect_1.1.2 rlang_1.0.6
#> [67] reshape2_1.4.4 later_1.3.0 munsell_0.5.0
#> [70] cellranger_1.1.0 tools_4.0.2 cachem_1.0.6
#> [73] cli_3.4.1 generics_0.1.2 broom_1.0.1
#> [76] evaluate_0.16 fastmap_1.1.0 yaml_2.3.5
#> [79] processx_3.7.0 knitr_1.40 fs_1.5.2
#> [82] nlme_3.1-148 projpred_2.0.2 mime_0.12
#> [85] xml2_1.3.3 compiler_4.0.2 shinythemes_1.1.2
#> [88] rstudioapi_0.14 gamm4_0.2-6 curl_4.3.2
#> [91] reprex_2.0.1 statmod_1.4.36 bslib_0.4.0
#> [94] stringi_1.7.8 ps_1.7.1 Brobdingnag_1.2-6
#> [97] lattice_0.20-41 Matrix_1.3-4 nloptr_1.2.2.2
#> [100] markdown_1.1 shinyjs_1.1 tensorA_0.36.2
#> [103] vctrs_0.5.0 pillar_1.8.1 lifecycle_1.0.3
#> [106] jquerylib_0.1.4 bridgesampling_1.0-0 estimability_1.3
#> [109] httpuv_1.5.5 R6_2.5.1 promises_1.1.1
#> [112] gridExtra_2.3 codetools_0.2-16 boot_1.3-25
#> [115] MASS_7.3-51.6 colourpicker_1.0 gtools_3.8.2
#> [118] assertthat_0.2.1 withr_2.5.0 shinystan_2.5.0
#> [121] multcomp_1.4-20 mgcv_1.8-31 parallel_4.0.2
#> [124] hms_1.1.1 grid_4.0.2 minqa_1.2.4
#> [127] coda_0.19-4 rmarkdown_2.16 googledrive_2.0.0
#> [130] shiny_1.6.0 lubridate_1.8.0 base64enc_0.1-3
#> [133] dygraphs_1.1.1.6
# For parallel processing
options(mc.cores = parallel::detectCores())
# Load cache
# qwraps2::lazyload_cache_dir(path = "R/analysis/04_size_spectra_fit_cache/html")
d <- read.csv("data/for_fitting/size_spectrum_dat.csv")
# This is for the size_df below (1 row 1 ind, for calculating mean size later)
df <- read.csv("data/cleaned/catch_BT_FM_1987-2003.csv") %>% mutate(area = Area)
#> mutate: new variable 'area' (character) with 2 unique values and 0% NA
# How many nets in total? (For scaling with effort later)
df %>%
group_by(netID) %>%
mutate(n = n()) %>%
ggplot(aes(n)) +
geom_histogram()
#> group_by: one grouping variable (netID)
#> mutate (grouped): new variable 'n' (integer) with 185 unique values and 0% NA
#> `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
df <- df %>% group_by(Area, year) %>% mutate(n_nets_year = length(unique(netID2))) %>% ungroup()
#> group_by: 2 grouping variables (Area, year)
#> mutate (grouped): new variable 'n_nets_year' (integer) with 10 unique values and 0% NA
#> ungroup: no grouping variables
# Test
df %>% filter(year == 1995 & Area == "BT") %>% distinct(netID2)
#> filter: removed 53,506 rows (99%), 524 rows remaining
#> distinct: removed 495 rows (94%), 29 rows remaining
# Now we need to find representative catch sizes
# Plot n per length group (cm)
p1 <- ggplot(df, aes(factor(length_group))) +
geom_histogram(stat = "count") +
facet_wrap(~Area, scales = "free") + # Note the differences in # data
theme_classic() +
coord_cartesian(expand = 0)
#> Warning: Ignoring unknown parameters: binwidth, bins, pad
# Filter out fish > 13 cm
df <- df %>% filter(length_group > 13) %>% as.data.frame()
#> filter: removed 21,128 rows (39%), 32,902 rows remaining
# Plot again
p2 <- ggplot(df, aes(factor(length_group))) +
geom_histogram(stat = "count") +
facet_wrap(~Area, scales = "free") + # Note the differences in # data
theme_classic() +
coord_cartesian(expand = 0)
#> Warning: Ignoring unknown parameters: binwidth, bins, pad
p1/p2
# Before we process data any further, save it because we will use it in this format
# for analysing mean size (i.e. we need 1 row = 1 ind)
size_df <- df
colnames(size_df)
#> [1] "Area" "Sektion" "Station" "year" "week"
#> [6] "day" "Längdgr_std" "length_group" "netID" "netID2"
#> [11] "area" "n_nets_year"
dataBintest <- d %>% rename("Number" = "cpue_numbers")
#> rename: renamed one variable (Number)
# Following and modifying this vignette:
# https://htmlpreview.github.io/?https://raw.githubusercontent.com/andrew-edwards/sizeSpectra/master/doc/MEPS_IBTS_MLEbins.html
# Forsmark
dataBin_FM <- dataBintest %>%
filter(area == "FM") %>%
select(SpecCode, wmin, wmax, Number)
#> filter: removed 12 rows (50%), 12 rows remaining
#> select: dropped 8 variables (X, area, min_length_group_cm, catch_n, effort_id, …)
n_FM = sum(dataBin_FM$Number)
xmin_FM = min(dataBin_FM$wmin)
xmax_FM = max(dataBin_FM$wmax)
FM_spectra = calcLike(negLL.fn = negLL.PLB.bins.species,
p = -1.5,
suppress.warnings = FALSE,
vecDiff = 0.5, # Default is 0.5, I get warning though
dataBinForLike = dataBin_FM,
n = n_FM,
xmin = xmin_FM,
xmax = xmax_FM)
#> Warning in nlm(f = negLL.fn, p = p, ...): NA/Inf replaced by maximum positive
#> value
FM_spectra_df = data.frame(Year = 2000,
xmin = xmin_FM,
xmax = xmax_FM,
n = n_FM,
b = FM_spectra$MLE,
confMin = FM_spectra$conf[1],
confMax = FM_spectra$conf[2])
# Biotest
dataBin_BT <- dataBintest %>%
filter(area == "BT") %>%
select(SpecCode, wmin, wmax, Number)
#> filter: removed 12 rows (50%), 12 rows remaining
#> select: dropped 8 variables (X, area, min_length_group_cm, catch_n, effort_id, …)
n_BT = sum(dataBin_BT$Number)
xmin_BT = min(dataBin_BT$wmin)
xmax_BT = max(dataBin_BT$wmax)
BT_spectra = calcLike(negLL.fn = negLL.PLB.bins.species,
p = -1.5,
suppress.warnings = FALSE,
vecDiff = 0.5, # Default is 0.5, I get warning though
dataBinForLike = dataBin_BT,
n = n_BT,
xmin = xmin_BT,
xmax = xmax_BT)
BT_spectra_df = data.frame(Year = 2000,
xmin = xmin_BT,
xmax = xmax_BT,
n = n_BT,
b = BT_spectra$MLE,
confMin = BT_spectra$conf[1],
confMax = BT_spectra$conf[2])
FM_spectra_df
BT_spectra_df
#** Loop and extract data for plotting =============================================
# https://htmlpreview.github.io/?https://raw.githubusercontent.com/andrew-edwards/sizeSpectra/master/doc/MEPS_IBTS_recommend.html
# These calculations are to get the required input for the recommended plot
# (see ?sizeSpectra::ISD_bin_plot for the structure). This could maybe be functionalised
# like the plotting function ISD_bin_plot().
# Forsmark ==========================================================================
dataBin_FM$Year <- 2000
dataRecommend.isd_FM = dplyr::select(dataBin_FM, Year, wmin, wmax, Number)
data.year.fm <- dplyr::arrange(dataRecommend.isd_FM, desc(wmin))
sumNumber = sum(data.year.fm$Number)
wmin.vec = data.year.fm$wmin
wmax.vec = data.year.fm$wmax
num.vec = data.year.fm$Number
countGTEwmin = rep(NA, length(num.vec)) # to do a manual count
lowCount = countGTEwmin
highCount = countGTEwmin
for(iii in 1:length(countGTEwmin))
{
countGTEwmin[iii] = sum( (wmin.vec >= wmin.vec[iii]) * num.vec)
lowCount[iii] = sum( (wmin.vec >= wmax.vec[iii]) * num.vec)
highCount[iii] = sum( (wmax.vec > wmin.vec[iii]) * num.vec)
}
data.year.fm = cbind(data.year.fm,
"countGTEwmin" = countGTEwmin,
"lowCount" = lowCount,
"highCount" = highCount)
data.year.fm = dplyr::tbl_df(data.year.fm)
#> Warning: `tbl_df()` was deprecated in dplyr 1.0.0.
#> ℹ Please use `tibble::as_tibble()` instead.
xlim.global = c(min(data.year.fm$wmin),
max(data.year.fm$wmax)) # x-axis limits to be common for all plots
ISD_bin_plot(data.year = data.year.fm,
b.MLE = FM_spectra_df$b,
b.confMin = FM_spectra_df$confMin,
b.confMax = FM_spectra_df$confMax,
year = 2000,
xlim = xlim.global,
xmin = FM_spectra_df$xmin,
xmax = FM_spectra_df$xmax
)
# Biotest ==========================================================================
dataBin_BT$Year <- 2000
dataRecommend.isd_BT = dplyr::select(dataBin_BT, Year, wmin, wmax, Number)
data.year.bt <- dplyr::arrange(dataRecommend.isd_BT, desc(wmin))
sumNumber = sum(data.year.bt$Number)
wmin.vec = data.year.bt$wmin
wmax.vec = data.year.bt$wmax
num.vec = data.year.bt$Number
countGTEwmin = rep(NA, length(num.vec)) # to do a manual count
lowCount = countGTEwmin
highCount = countGTEwmin
for(iii in 1:length(countGTEwmin))
{
countGTEwmin[iii] = sum( (wmin.vec >= wmin.vec[iii]) * num.vec)
lowCount[iii] = sum( (wmin.vec >= wmax.vec[iii]) * num.vec)
highCount[iii] = sum( (wmax.vec > wmin.vec[iii]) * num.vec)
}
data.year.bt = cbind(data.year.bt,
"countGTEwmin" = countGTEwmin,
"lowCount" = lowCount,
"highCount" = highCount)
data.year.bt = dplyr::tbl_df(data.year.bt)
xlim.global = c(min(data.year.bt$wmin),
max(data.year.bt$wmax)) # x-axis limits to be common for all plots
ISD_bin_plot(data.year = data.year.bt,
b.MLE = BT_spectra_df$b,
b.confMin = BT_spectra_df$confMin,
b.confMax = BT_spectra_df$confMax,
year = 2000,
xlim = xlim.global,
xmin = BT_spectra_df$xmin,
xmax = BT_spectra_df$xmax
)
pal <- rev(brewer.pal(n = 6, name = "Paired")[c(2, 6)])
# https://htmlpreview.github.io/?https://raw.githubusercontent.com/andrew-edwards/sizeSpectra/master/doc/MEPS_IBTS_MLEbins.html
FM_spectra = dplyr::tbl_df(FM_spectra_df)
FM_spectra = dplyr::mutate(FM_spectra, stdErr = (abs(confMin-b) + abs(confMax-b))/(2*1.96) )
FM_spectra = dplyr::mutate(FM_spectra, area = "FM")
BT_spectra = dplyr::tbl_df(BT_spectra_df)
BT_spectra = dplyr::mutate(BT_spectra, stdErr = (abs(confMin-b) + abs(confMax-b))/(2*1.96) )
BT_spectra = dplyr::mutate(BT_spectra, area = "BT")
spectra <- rbind(BT_spectra, FM_spectra)
# Now make a ggplot-version to put in the paper as an example
# First put in in a dataframe instead of adding them as lines in the base plot
# BT (warm)
data_year_bt <- data.year.bt
data_year_bt$area <- "Warm"
sumNumber = sum(data_year_bt$Number)
xmin = BT_spectra$xmin
xmax = BT_spectra$xmax
x.PLB = seq(xmin_BT, xmax_BT, length = 10001)
b.MLE = BT_spectra$b
b.confMin = BT_spectra$confMin
b.confMax = BT_spectra$confMax
data_year_bt2 <- data.frame(x.PLB = x.PLB,
y.PLB = (1 - pPLB(x = x.PLB, b = b.MLE, xmin = min(x.PLB), xmax = max(x.PLB))) * sumNumber,
y.PLB.confMin = (1 - pPLB(x = x.PLB, b = b.confMin, xmin = min(x.PLB), xmax = max(x.PLB))) * sumNumber,
y.PLB.confMax = (1 - pPLB(x = x.PLB, b = b.confMax, xmin = min(x.PLB), xmax = max(x.PLB))) * sumNumber,
area = "Warm")
# FM (cold)
data_year_fm <- data.year.fm
data_year_fm$area <- "Cold"
sumNumber = sum(data_year_fm$Number)
xmin = FM_spectra$xmin
xmax = FM_spectra$xmax
x.PLB = seq(xmin_FM, xmax_FM, length = 10001)
b.MLE = FM_spectra$b
b.confMin = FM_spectra$confMin
b.confMax = FM_spectra$confMax
data_year_fm2 <- data.frame(x.PLB = x.PLB,
y.PLB = (1 - pPLB(x = x.PLB, b = b.MLE, xmin = min(x.PLB), xmax = max(x.PLB))) * sumNumber,
y.PLB.confMin = (1 - pPLB(x = x.PLB, b = b.confMin, xmin = min(x.PLB), xmax = max(x.PLB))) * sumNumber,
y.PLB.confMax = (1 - pPLB(x = x.PLB, b = b.confMax, xmin = min(x.PLB), xmax = max(x.PLB))) * sumNumber,
area = "Cold")
data_year <- rbind(data_year_bt, data_year_fm)
data_year2 <- rbind(data_year_bt2, data_year_fm2)
b_warm <- round(BT_spectra$b, 2)
b_cold <- round(FM_spectra$b, 2)
b_warm_upr <- round(BT_spectra$confMax, 2)
b_cold_upr <- round(FM_spectra$confMax, 2)
b_warm_lwr <- round(BT_spectra$confMin, 2)
b_cold_lwr <- round(FM_spectra$confMin, 2)
data_year2 <- data_year2 %>%
mutate(area_plot = ifelse(area == "Warm", "Heat", "Ref"))
#> mutate: new variable 'area_plot' (character) with 2 unique values and 0% NA
pmle <- ggplot(data_year2) +
geom_rect(data = data_year,
aes(xmin = wmin, xmax = wmax, ymin = lowCount, ymax = highCount, fill = area),
alpha = 0.2) +
geom_line(aes(x.PLB, y.PLB, color = area_plot), size = 1.3, alpha = 0.8) +
geom_line(aes(x.PLB, y.PLB.confMin, color = area_plot), linetype = 3, alpha = 0.8) +
geom_line(aes(x.PLB, y.PLB.confMax, color = area_plot), linetype = 3, alpha = 0.8) +
scale_color_manual(values = pal, name = "Area") +
scale_fill_manual(values = rev(pal)) +
scale_x_log10() +
scale_y_log10() +
guides(fill = "none") +
coord_cartesian(ylim = c(0.01, 1000)) +
labs(x = "Body mass, x [g]", y = "No. individuals \u2265 x") +
theme(legend.position = c(0.8, 0.9)) +
guides(color = guide_legend(override.aes = list(linetype = 1, size = 2, alpha = 0.3, color = pal))) +
theme(text = element_text(size = 12),
legend.position = c(0.9, 0.9),
legend.spacing.y = unit(0, 'cm'),
legend.key.size = unit(0, "cm"),
legend.title = element_text(size = 10),
legend.text = element_text(size = 8))
pmle
#> Warning: Transformation introduced infinite values in continuous y-axis
#> Transformation introduced infinite values in continuous y-axis
#> Transformation introduced infinite values in continuous y-axis
#> Transformation introduced infinite values in continuous y-axis
# Now plot the CI's
BT_spectra
spec_tot <- rbind(BT_spectra, FM_spectra) %>%
mutate(Area = ifelse(area == "BT", "Heated", "Reference"))
#> mutate: new variable 'Area' (character) with 2 unique values and 0% NA
est_plot <- ggplot(spec_tot, aes(Area, b, color = Area)) +
geom_point(size = 3) +
scale_color_manual(values = pal) +
guides(color = "none") +
labs(y = expression(paste("Size-spectrum exponent ", italic((gamma))))) +
geom_errorbar(aes(x = Area, ymin = confMin, ymax = confMax), width = 0, size = 0.75)
phist <-
size_df %>%
mutate(Area2 = ifelse(Area == "BT", "Warm", "Cold")) %>%
ggplot(., aes(x = length_group, fill = Area2, group = Area2)) +
stat_count(mapping = aes(x = length_group, y = ..prop.., group = Area2),
position = position_dodge(), alpha = 0.8) +
scale_fill_manual(values = rev(pal)) +
theme_light() +
coord_cartesian(expand = 0) +
labs(x = "Length group [cm]", y = "Proportion") +
guides(fill = "none") +
theme(text = element_text(size = 12))
#> mutate: new variable 'Area2' (character) with 2 unique values and 0% NA
# Combine plots
#pmle / (est_plot | phist) + plot_annotation(tag_levels = "A") #+ plot_layout(widths = c(5, 1, 1))
# This is beacuse ggsave is producing an erorr when I save to pdf when using larger or equal-sign
grDevices::cairo_pdf("figures/size_spec.pdf", width = 7.87402, height = 7.87402)
pmle / (est_plot | phist) + plot_annotation(tag_levels = "A")
#> Warning: Transformation introduced infinite values in continuous y-axis
#> Transformation introduced infinite values in continuous y-axis
#> Transformation introduced infinite values in continuous y-axis
#> Transformation introduced infinite values in continuous y-axis
dev.off()
#> quartz_off_screen
#> 2