Read and add final touches to data
df <- read.csv("data/cleaned/size_at_age_BT_FM_1970-2004.csv", sep = ";")
# Add an area-specific ID
df$ID2 <- paste(df$ID, df$area, sep = ".")
# Filter years to match the rest of the data sets and analyses
df <- df %>% filter(birth_year > 1980 & birth_year < 1998,
catch_year < 2004 & catch_age == age)
#> filter: removed 44,959 rows (86%), 7,476 rows remaining
# Plot data
p1 <- ggplot(df, aes(age, length, color = area)) +
geom_point() +
facet_wrap(~ birth_year)
p1

# Now we need to reshape the data frame a little bit to add in the dummy variable
# for area, and we'll use this dataframe for the models
bt <- filter(df, area == "BT")
#> filter: removed 4,704 rows (63%), 2,772 rows remaining
fm <- filter(df, area == "FM")
#> filter: removed 2,772 rows (37%), 4,704 rows remaining
dfm <- data.frame(rbind(cbind(bt, areaW=1, areaC=0), cbind(fm, areaW=0, areaC=1)))
dfm$age <- as.numeric(dfm$age)
ggplot(filter(dfm, birth_year == 1988), aes(age, length, color = area)) +
geom_point()
#> filter: removed 6,900 rows (92%), 576 rows remaining

# They seem to come from a different gear
filter(dfm, area == "FM" & birth_year == 1988 & length > 310)
#> filter: removed 7,465 rows (>99%), 11 rows remaining
ggplot(filter(dfm, area == "FM"), aes(age, length, color = factor(gear))) +
geom_point(size = 0.4, alpha = 0.5) +
facet_wrap(~ birth_year)
#> filter: removed 2,772 rows (37%), 4,704 rows remaining

# Remove the odd gear
sort(unique(dfm$gear))
#> [1] 2.00 9.00 9.37 15.00 32.00 64.00
dfm <- dfm %>% filter(!gear == 32)
#> filter: removed 42 rows (1%), 7,434 rows remaining
sort(unique(dfm$gear))
#> [1] 2.00 9.00 9.37 15.00 64.00
# Now it looks better
ggplot(filter(dfm, area == "FM"), aes(age, length, color = factor(gear))) +
geom_point(size = 0.4, alpha = 0.5) +
facet_wrap(~ birth_year)
#> filter: removed 2,772 rows (37%), 4,662 rows remaining

# And in BT more or less only gear 9 is used
ggplot(filter(dfm, area == "BT"), aes(age, length, color = factor(gear))) +
geom_point(size = 0.4, alpha = 0.5) +
facet_wrap(~ birth_year)
#> filter: removed 4,662 rows (63%), 2,772 rows remaining

# Lastly, fit the model using cm not mm
dfm$length_cm <- dfm$length / 10
dfm$log_length_cm <- log(dfm$length_cm)
# Change age to integer
dfm$age <- as.integer(dfm$age)
min(dfm$age)
#> [1] 1
min(dfm$birth_year)
#> [1] 1981