Load libraries

library(tidyverse); theme_set(theme_classic(base_size = 12))
library(nlstools)
library(viridis)
library(tidylog)
library(RColorBrewer)
library(patchwork)

# Print package versions
sessionInfo() 
#> R version 4.0.2 (2020-06-22)
#> Platform: x86_64-apple-darwin17.0 (64-bit)
#> Running under: macOS  10.16
#> 
#> Matrix products: default
#> BLAS:   /Library/Frameworks/R.framework/Versions/4.0/Resources/lib/libRblas.dylib
#> LAPACK: /Library/Frameworks/R.framework/Versions/4.0/Resources/lib/libRlapack.dylib
#> 
#> locale:
#> [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
#> 
#> attached base packages:
#> [1] stats     graphics  grDevices utils     datasets  methods   base     
#> 
#> other attached packages:
#>  [1] patchwork_1.1.1    RColorBrewer_1.1-2 tidylog_1.0.2      viridis_0.5.1     
#>  [5] viridisLite_0.4.0  nlstools_1.0-2     forcats_0.5.1      stringr_1.4.0     
#>  [9] dplyr_1.0.7        purrr_0.3.4        readr_2.1.1        tidyr_1.1.4       
#> [13] tibble_3.1.5       ggplot2_3.3.5      tidyverse_1.3.1   
#> 
#> loaded via a namespace (and not attached):
#>  [1] Rcpp_1.0.8       lubridate_1.8.0  clisymbols_1.2.0 assertthat_0.2.1
#>  [5] digest_0.6.28    utf8_1.2.2       R6_2.5.1         cellranger_1.1.0
#>  [9] backports_1.3.0  reprex_2.0.1     evaluate_0.14    httr_1.4.2      
#> [13] pillar_1.6.4     rlang_1.0.2      readxl_1.3.1     rstudioapi_0.13 
#> [17] jquerylib_0.1.4  rmarkdown_2.11   munsell_0.5.0    broom_0.7.10    
#> [21] compiler_4.0.2   modelr_0.1.8     xfun_0.27        pkgconfig_2.0.3 
#> [25] htmltools_0.5.2  tidyselect_1.1.1 gridExtra_2.3    fansi_0.5.0     
#> [29] crayon_1.4.2     tzdb_0.2.0       dbplyr_2.1.1     withr_2.4.2     
#> [33] grid_4.0.2       jsonlite_1.7.2   gtable_0.3.0     lifecycle_1.0.1 
#> [37] DBI_1.1.1        magrittr_2.0.1   scales_1.1.1     cli_3.1.0       
#> [41] stringi_1.7.5    fs_1.5.0         xml2_1.3.2       bslib_0.2.4     
#> [45] ellipsis_0.3.2   generics_0.1.2   vctrs_0.3.8      tools_4.0.2     
#> [49] glue_1.6.2       hms_1.1.1        fastmap_1.1.0    yaml_2.2.1      
#> [53] colorspace_2.0-2 rvest_1.0.2      knitr_1.36       haven_2.3.1     
#> [57] sass_0.3.1

Read and add final touches to data

df <- read.csv("data/cleaned/size_at_age_BT_FM_1970-2004.csv", sep = ";")

# Add an area-specific ID
df$ID2 <- paste(df$ID, df$area, sep = ".")

# Filter years to match the rest of the data sets and analyses
df <- df %>% filter(birth_year > 1980 & birth_year < 1998,
                    catch_year < 2004 & catch_age == age)
#> filter: removed 44,959 rows (86%), 7,476 rows remaining

# Plot data
p1 <- ggplot(df, aes(age, length, color = area)) +
  geom_point() +
  facet_wrap(~ birth_year)
p1


# Now we need to reshape the data frame a little bit to add in the dummy variable
# for area, and we'll use this dataframe for the models
bt <- filter(df, area == "BT")
#> filter: removed 4,704 rows (63%), 2,772 rows remaining
fm <- filter(df, area == "FM")
#> filter: removed 2,772 rows (37%), 4,704 rows remaining

dfm <- data.frame(rbind(cbind(bt, areaW=1, areaC=0), cbind(fm, areaW=0, areaC=1)))

dfm$age <- as.numeric(dfm$age)

ggplot(filter(dfm, birth_year == 1988), aes(age, length, color = area)) +
  geom_point()
#> filter: removed 6,900 rows (92%), 576 rows remaining


# They seem to come from a different gear
filter(dfm, area == "FM" & birth_year == 1988 & length > 310)
#> filter: removed 7,465 rows (>99%), 11 rows remaining
ggplot(filter(dfm, area == "FM"), aes(age, length, color = factor(gear))) +
  geom_point(size = 0.4, alpha = 0.5) +
  facet_wrap(~ birth_year)
#> filter: removed 2,772 rows (37%), 4,704 rows remaining


# Remove the odd gear
sort(unique(dfm$gear))
#> [1]  2.00  9.00  9.37 15.00 32.00 64.00
dfm <- dfm %>% filter(!gear == 32)
#> filter: removed 42 rows (1%), 7,434 rows remaining
sort(unique(dfm$gear))
#> [1]  2.00  9.00  9.37 15.00 64.00

# Now it looks better
ggplot(filter(dfm, area == "FM"), aes(age, length, color = factor(gear))) +
  geom_point(size = 0.4, alpha = 0.5) +
  facet_wrap(~ birth_year)
#> filter: removed 2,772 rows (37%), 4,662 rows remaining


# And in BT more or less only gear 9 is used
ggplot(filter(dfm, area == "BT"), aes(age, length, color = factor(gear))) +
  geom_point(size = 0.4, alpha = 0.5) +
  facet_wrap(~ birth_year)
#> filter: removed 4,662 rows (63%), 2,772 rows remaining


# Lastly, fit the model using cm not mm
dfm$length_cm <- dfm$length / 10
dfm$log_length_cm <- log(dfm$length_cm)

# Change age to integer
dfm$age <- as.integer(dfm$age)

min(dfm$age)
#> [1] 1
min(dfm$birth_year) 
#> [1] 1981

Save data

write.csv(dfm, "data/for_fitting/vbge_dat.csv")