# hhSAR prep data

# load in the relevant files
# do any transformations needed, and
# omit rows with negative serial intervals

dat_raw <- read_excel("data/REVISEDSEQUENCINGOutput.xlsx") 

if (nrow(dat_raw) == 0L){
    stop("Data frame is empty. Please contact authors if you wish to proceed")
}

individuals_to_exclude <- c("CAC0337/PK2",
                            "VEC7696/RK2")

households_to_exclude  <- c("CAC0015",
                            "VEC006",
                            "VEC11001")

dat_clean      <- clean_data(dat_raw,
                             HOCONUMBER_to_exclude = households_to_exclude,
                             NickID_to_exclude     = individuals_to_exclude)

dat_SI         <- calculate_serial_intervals(dat_clean)

min_SIs        <- list('Main'        = -2,
                       'Sensitivity' = -Inf,
                       'Flat priors' = -2)

reallocate     <- list('Main'        = FALSE,
                       'Sensitivity' = FALSE,
                       'Flat priors' = FALSE)

my_palette     <- setNames(rev(RColorBrewer::brewer.pal(1 + length(min_SIs),
                                                        "Blues")[-1]),
                           nm = names(min_SIs))

dat_to_analyse <- map(.x = min_SIs, .f = ~drop_long_SIs(dat_clean, dat_SI, .x))

# do we need to reallocate the STATUS column for households?
dat_to_analyse %<>% map2(.x = ., .y = reallocate,
                         .f = ~if(.y){
                             reallocate_status(.x)
                         } else {
                             .x
                         })

# work out when vaccinations occurred

vaccines       <- map(dat_to_analyse, get_vaccine_dates)

## which swab number are we looking at?
Swabs          <- map(dat_to_analyse, get_swab_dates)

source('hhSARmodelLineages.R')

vax_and_swabs  <- map2(.x = vaccines,
                       .y = Swabs,
                       .f = ~bind_vaccines_and_Swabs(.x, .y))
# need to include years before/after 18 years of age

vax_swabs_vars <- map2(.x = vax_and_swabs,
                       .y = Variants,
                       .f = ~inner_join(.x,.y))

# recode the variables to numeric for JAGS
vas_dat        <- map(vax_swabs_vars, recode_vars)

# append prediction information    
vas_pred       <- map(.x = vas_dat, 
                      .f = make_pred) 

# combine into a list format for JAGS
vas_list       <- map2(.x = vas_dat, 
                       .y = vas_pred, 
                       .f = make_jags_data_list)

# version of data that has the swab ID code (ERR)
meta           <- read_csv("phylo/hoco_meta.csv") 
