Create age-length keys and assign ages to catch data

library(tidyverse); theme_set(theme_classic(base_size = 12))
#> ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.1 ──
#> ✓ ggplot2 3.3.5     ✓ purrr   0.3.4
#> ✓ tibble  3.1.5     ✓ dplyr   1.0.7
#> ✓ tidyr   1.1.4     ✓ stringr 1.4.0
#> ✓ readr   2.1.1     ✓ forcats 0.5.1
#> ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
#> x dplyr::filter() masks stats::filter()
#> x dplyr::lag()    masks stats::lag()
library(tidylog)
#> 
#> Attaching package: 'tidylog'
#> The following objects are masked from 'package:dplyr':
#> 
#>     add_count, add_tally, anti_join, count, distinct, distinct_all,
#>     distinct_at, distinct_if, filter, filter_all, filter_at, filter_if,
#>     full_join, group_by, group_by_all, group_by_at, group_by_if,
#>     inner_join, left_join, mutate, mutate_all, mutate_at, mutate_if,
#>     relocate, rename, rename_all, rename_at, rename_if, rename_with,
#>     right_join, sample_frac, sample_n, select, select_all, select_at,
#>     select_if, semi_join, slice, slice_head, slice_max, slice_min,
#>     slice_sample, slice_tail, summarise, summarise_all, summarise_at,
#>     summarise_if, summarize, summarize_all, summarize_at, summarize_if,
#>     tally, top_frac, top_n, transmute, transmute_all, transmute_at,
#>     transmute_if, ungroup
#> The following objects are masked from 'package:tidyr':
#> 
#>     drop_na, fill, gather, pivot_longer, pivot_wider, replace_na,
#>     spread, uncount
#> The following object is masked from 'package:stats':
#> 
#>     filter
library(RColorBrewer)
library(patchwork)
library(FSA)
#> ## FSA v0.9.1. See citation('FSA') if used in publication.
#> ## Run fishR() for related website and fishR('IFAR') for related book.
sessionInfo() 
#> R version 4.0.2 (2020-06-22)
#> Platform: x86_64-apple-darwin17.0 (64-bit)
#> Running under: macOS  10.16
#> 
#> Matrix products: default
#> BLAS:   /Library/Frameworks/R.framework/Versions/4.0/Resources/lib/libRblas.dylib
#> LAPACK: /Library/Frameworks/R.framework/Versions/4.0/Resources/lib/libRlapack.dylib
#> 
#> locale:
#> [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
#> 
#> attached base packages:
#> [1] stats     graphics  grDevices utils     datasets  methods   base     
#> 
#> other attached packages:
#>  [1] FSA_0.9.1          patchwork_1.1.1    RColorBrewer_1.1-2 tidylog_1.0.2     
#>  [5] forcats_0.5.1      stringr_1.4.0      dplyr_1.0.7        purrr_0.3.4       
#>  [9] readr_2.1.1        tidyr_1.1.4        tibble_3.1.5       ggplot2_3.3.5     
#> [13] tidyverse_1.3.1   
#> 
#> loaded via a namespace (and not attached):
#>  [1] clisymbols_1.2.0 tidyselect_1.1.1 xfun_0.27        bslib_0.2.4     
#>  [5] haven_2.3.1      colorspace_2.0-2 vctrs_0.3.8      generics_0.1.2  
#>  [9] htmltools_0.5.2  yaml_2.2.1       utf8_1.2.2       rlang_1.0.2     
#> [13] jquerylib_0.1.4  pillar_1.6.4     withr_2.4.2      glue_1.6.2      
#> [17] DBI_1.1.1        dbplyr_2.1.1     modelr_0.1.8     readxl_1.3.1    
#> [21] lifecycle_1.0.1  cellranger_1.1.0 munsell_0.5.0    gtable_0.3.0    
#> [25] rvest_1.0.2      evaluate_0.14    knitr_1.36       tzdb_0.2.0      
#> [29] fastmap_1.1.0    fansi_0.5.0      broom_0.7.10     Rcpp_1.0.8      
#> [33] backports_1.3.0  scales_1.1.1     jsonlite_1.7.2   fs_1.5.0        
#> [37] hms_1.1.1        digest_0.6.28    stringi_1.7.5    grid_4.0.2      
#> [41] cli_3.1.0        tools_4.0.2      magrittr_2.0.1   sass_0.3.1      
#> [45] crayon_1.4.2     pkgconfig_2.0.3  ellipsis_0.3.2   xml2_1.3.2      
#> [49] reprex_2.0.1     lubridate_1.8.0  assertthat_0.2.1 rmarkdown_2.11  
#> [53] httr_1.4.2       rstudioapi_0.13  R6_2.5.1         compiler_4.0.2

Read data

length_at_age <- read.csv("data/size_at_age_BT_FM_1970-2004.csv", sep = ";")

# Remove gear 32, see vbge script
length_at_age <- length_at_age %>% filter(!gear == 32)
#> filter: removed 292 rows (1%), 52,143 rows remaining

# Filter years I have catch data for
length_at_age <- length_at_age %>% filter(catch_year > 1986 & catch_year < 2004)
#> filter: removed 21,612 rows (41%), 30,531 rows remaining

# I need to make the ID completely unique (since I should include ID information in my
# mixed model), otherwise they may have the same ID in different areas.
# We solve this by writing ID with area + ID. The paste function pastes things together
length_at_age$ID <- paste(length_at_age$ID, length_at_age$area, sep = "")

# Check catch age corresponds to number of read length-at-ages!
length_at_age <- length_at_age %>%
  group_by(ID) %>%
  mutate(n = n()) %>%
  mutate(length = length/10) %>%
  rename("back_calc_age" = "age") %>% 
  ungroup()
#> group_by: one grouping variable (ID)
#> mutate (grouped): new variable 'n' (integer) with 9 unique values and 0% NA
#> mutate (grouped): changed 30,531 values (100%) of 'length' (0 new NA)
#> rename: renamed one variable (back_calc_age)
#> ungroup: no grouping variables

ggplot(length_at_age, aes(factor(catch_age), factor(n))) + geom_point()


# Remove individuals with the same ID (bug)
length_at_age <- length_at_age %>% filter(n <= catch_age)
#> filter: removed 3 rows (<1%), 30,528 rows remaining

# Now every individual has the same number of rows as it's age in years
ggplot(length_at_age, aes(factor(catch_age), factor(n))) + geom_point()


# Plot size distributions in sample
length_at_age %>%
  filter(area == "FM") %>% 
  ggplot(., aes(length)) +
  geom_histogram() +
  facet_wrap(~ catch_year) +
  theme_classic()
#> filter: removed 8,938 rows (29%), 21,590 rows remaining
#> `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.


length_at_age %>%
  filter(area == "BT") %>% 
  ggplot(., aes(length)) +
  geom_histogram() +
  facet_wrap(~ catch_year) +
  theme_classic()
#> filter: removed 21,590 rows (71%), 8,938 rows remaining
#> `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.


# Now we need to use the same length categories as in the catch data
catch_data <- read.csv("data/catch_BT_FM_1987-2003.csv")

length_at_age <- length_at_age %>% 
  filter(length > 5.1) %>% # This is the smallest category in the catch data
  mutate(len_cat = lencat(length, w = 0.5, startcat = 5.1))
#> filter: removed 112 rows (<1%), 30,416 rows remaining
#> mutate: new variable 'len_cat' (double) with 80 unique values and 0% NA
  
len_cats <- sort(unique(length_at_age$len_cat))

ggplot(length_at_age, aes(len_cat, length)) + geom_point()

Create area-specific age-length keys

Once the length category variable has been added to the age sample data frame, table() is used to construct the summary contingency table of numbers of fish in each combined length and age category.

The row variable (length category) is the first and the column variable (age) is the second argument to this function. The results of table() should be assigned to an object and then submitted as the first argument to prop.table() along with margin=1 as a second argument to construct a row-proportions table. The resulting row-proportions table is the actual age-length key determined from the age sample and is ready to be applied to the length sample.

bt_temp_df <- length_at_age %>% filter(area == "BT")
#> filter: removed 21,479 rows (71%), 8,937 rows remaining
fm_temp_df <- length_at_age %>% filter(area == "FM")
#> filter: removed 8,937 rows (29%), 21,479 rows remaining
  
# Create age-length key
bt_alk_raw <- xtabs(~len_cat + back_calc_age, data = bt_temp_df)
fm_alk_raw <- xtabs(~len_cat + back_calc_age, data = fm_temp_df)
  
bt_alk <- prop.table(bt_alk_raw, margin = 1)
fm_alk <- prop.table(fm_alk_raw, margin = 1)
  
# Now age the catch data by each area using the area-specific age length key
# area
bt_catch <- catch_data %>% filter(Area == "BT")
#> filter: removed 45,794 rows (85%), 8,236 rows remaining
fm_catch <- catch_data %>% filter(Area == "FM")
#> filter: removed 8,236 rows (15%), 45,794 rows remaining
  
bt_aged_catch <- alkIndivAge(bt_alk, ~length_group, data = bt_catch)
fm_aged_catch <- alkIndivAge(fm_alk, ~length_group, data = fm_catch)
#> Warning: The maximum observed length in the length sample (45.1) is greater
#>  than the largest length category in the age-length key (40.1).
#>  The last length category will be treated as all-inclusive.
  
# Combine into single data frame
aged_catch <- rbind(bt_aged_catch, fm_aged_catch)

Plot

aged_catch %>% 
  mutate(area2 = ifelse(Area == "BT", "Warm", "Cold")) %>% 
  ggplot(., aes(length_group, fill = factor(age))) +
  geom_histogram(binwidth = 1, position = "stack") + 
  facet_wrap(~ area2, scales = "free") + 
  scale_fill_brewer(palette = "Set1") + 
  theme(text = element_text(size = 12),
        legend.title = element_text(size = 8),
        legend.text = element_text(size = 8),
        aspect.ratio = 1) + 
  coord_cartesian(expand = 0) + 
  labs(fill = "Age",
       x = "Length group",
       y = "Count")
#> mutate: new variable 'area2' (character) with 2 unique values and 0% NA

  
ggsave("figures/supp/age_by_length.png", width = 6.5, height = 6.5, dpi = 600)

Save

# Save data frame for catch curve analysis
write.csv(aged_catch, "data/cleaned/aged_catch_BT_FM_1987-2003.csv", row.names = FALSE)