# Patient characteristics for drop-out numbers

dat_to_analyse_ <- dat_to_analyse %>%
    {.[!grepl(pattern = 'Flat priors', x = names(.))]}

dat_to_analyse_ %>%
    map_df(~count(.x, HOCONUMBER) %>%
               summarise(Number = length(n)), .id = 'Analysis') %>%
    kable(format = 'simple', caption = "Number of households in analysis")

# how many are positive
dat_to_analyse_ %>%
    map_df(~select(.x, HOCONUMBER, STATUS, NickID, FLCORC) %>%
               mutate(Covid = 0L +
                          grepl(x       = FLCORC, 
                                pattern = "SARS CoV-2 detected in this sample")) %>%
               group_by(NickID, STATUS) %>%
               summarise(Covid = max(Covid), .groups = 'drop') %>%
               group_by(STATUS) %>%
               summarise(n = sum(Covid),
                         N = n()) %>%
               mutate(p = n/N),
           .id = 'Analysis') %>%
    group_by(Analysis) %>%
    transmute(value = sprintf("%s/%s (%0.0f%%)", n, N, 100*p),
              Status = str_to_title(STATUS)) %>%
    spread(Status, value) %>%
    kable(format = 'simple', caption = "Positivity of household members in analysis")


# how many do we have sequences for?

select(meta, NickID, ERR) %>% 
    mutate(ERR = !is.na(ERR)) %>%
    group_by(NickID) %>%
    summarise(ERR = max(ERR)) -> has_seq


dat_to_analyse_ %>%
    map(~distinct(.x, NickID, STATUS)) %>%
    map(~left_join(.x, has_seq, by = 'NickID')) %>%
    map_df(.id = "Analysis", ~group_by(.x, STATUS) %>% 
               summarise(n = sum(ERR, na.rm = T),
                         N = n())) %>%
    mutate(Status = str_to_title(STATUS)) %>%
    select(Analysis, Status, Number = n, Total = N) %>%
    kable(format = 'simple', caption = "Sequence availability for household members in analysis")

ever_positive <- 
    map(dat_to_analyse_, 
        ~mutate(.x, 
                Covid = 0L +
                    grepl(pattern = "SARS CoV-2 detected in this sample", x = FLCORC)) %>%
            filter(Covid == 1L) %>%
            distinct(NickID, Covid))

dat_to_analyse_ %>%
    map(~distinct(.x, NickID, STATUS)) %>%
    map2(.y = ever_positive, ~left_join(.x, .y, by = 'NickID')) %>%
    map_df(.id = "Analysis", ~group_by(.x, STATUS) %>% 
               summarise(n = sum(Covid, na.rm = T),
                         N = n())) %>%
    group_by(Analysis, STATUS) %>%
    transmute(value = sprintf("%s/%s (%0.0f%%)", n, N, 100*n/N)) %>%
    mutate(STATUS = str_to_title(STATUS)) %>%
    spread(STATUS, value) %>%
    knitr::kable(format = 'simple', caption = 'Proportion of individuals who are ever positive')

# how many do we have sequences for?
dat_to_analyse_ %>%
    map(~distinct(.x, NickID, STATUS)) %>%
    map(~left_join(.x, has_seq, by = 'NickID')) %>%
    map2(.y = ever_positive, ~left_join(.x, .y, by = 'NickID')) %>%
    map(~filter(.x, Covid == 1L)) %>%
    map_df(.id = "Analysis",
           ~group_by(.x, STATUS) %>% 
               summarise(n = sum(ERR, na.rm = T),
                         N = n())) %>%
    group_by(Analysis, STATUS) %>%
    transmute(value = sprintf("%s/%s (%0.0f%%)", n, N, 100*n/N)) %>%
    mutate(STATUS = str_to_title(STATUS)) %>%
    spread(STATUS, value) %>%
    kable(format = 'simple',
          align = 'lrr',
          caption = 'Proportion of ever-positive individuals with at least one sequenced swab')

# household size

dat_to_analyse_ %>%
    bind_rows(.id = "Analysis") %>%
    mutate(StatedHouseholdSize = parse_integer(
        substr(StatedHouseholdSize, 1, 1)),
        StatedHouseholdSize = StatedHouseholdSize - 1) %>%
    distinct(Analysis, HOCONUMBER, StatedHouseholdSize) %>%
    filter(StatedHouseholdSize > 0) %>%
    nest(data = -Analysis) %>%
    mutate(Mean = map_dbl(data, ~mean(    .x$StatedHouseholdSize)),
           SD   = map_dbl(data, ~sd(      .x$StatedHouseholdSize)),
           Q    = map(data,     ~quantile(.x$StatedHouseholdSize)),
           Mode = map_dbl(data, function(x){
               count(x, StatedHouseholdSize) %>%
                   filter(n == max(n)) %>%
                   pull(StatedHouseholdSize) %>%
                   return
           }),
    )  %>%
    unnest_wider(Q) %>%
    select(-data) %>%
    kable(format = 'simple', digits = 2, 
          caption = 'Summary statistics of number of contacts in each household')


# number of households with each variant

Variants %>%
    {.[!grepl(pattern = 'Flat priors', x = names(.))]} %>%
    map(~count(.x, Variant))

# missing lineages - what are they?
Variants %>%
    {.[!grepl(pattern = 'Flat priors', x = names(.))]} %>%
    map(~filter(.x, Variant %in% c("Uninfected", "Unknown", "Not sequenced")) %>%
            mutate(Variant2 = cut(p_delta, c(0, 0.5, 1), right = T, include.lowest = T,
                                  labels = c("Alpha", "Delta"))) %>%
            count(Variant, Variant2) %>%
            spread(Variant2, n) %>%
            mutate(Total = Alpha + Delta) %>%
            {bind_rows(., summarise_at(.,
                                       .vars = vars(-Variant),
                                       .funs = sum) %>%
                           mutate(Variant = "Total"))})

vaccines %>%
    {.[!grepl(pattern = 'Flat priors', x = names(.))]} %>%
    map_df(
        ~distinct(.x$CASE, HOCONUMBER,
                  vax_CASE = vax, 
                  dose_CASE = dose)  %>%
            inner_join(Variants$Sensitivity,
                       by = 'HOCONUMBER') %>%
            mutate(Variant2 = cut(
                x              = p_delta,
                breaks         = c(0, 0.5, 1),
                include.lowest = T,
                right          = T,
                labels         = c("Alpha", "Delta")
            )) %>% count(vax_CASE, Variant2, dose_CASE) %>%
            spread(Variant2, n) %>%
            {bind_rows(., summarise_at(., 
                                       .vars = vars(Alpha, Delta),
                                       .funs = sum) %>%
                           mutate(vax_CASE = "Total",
                                  dose_CASE = " "))},
        .id = "Analysis") %>%
    mutate_at(.vars = vars(Analysis, vax_CASE),
              .funs = unfill_vec, replace_value = "") %>%
    mutate(vax_CASE = sub(pattern = 'Total', replacement = "TOTAL", x = vax_CASE)) %>%
    rename(`Case vacc.` = vax_CASE,
           `Case doses` = dose_CASE) %>%
    knitr::kable(format = 'simple', caption = "Household lineage by index case vaccine history")

dat_to_analyse_$Main %>%
    filter(ReCodedVaccineType != "None") %>%
    group_by(NickID, STATUS, ReCodedVaccineType, DosesCovidVacc) %>%
    slice_min(SAMPLE_DT, n = 1) %>%
    
    transmute(time = ifelse(DosesCovidVacc == "One",
                            SAMPLE_DT - VaccDate1,
                            SAMPLE_DT - VaccDate2)) %>%
    ungroup %>%
    group_by(DosesCovidVacc, ReCodedVaccineType) %>%
    filter(time == max(time)) %>%
    ungroup %>%
    mutate(time = sprintf("%s days", time)) %>%
    select(-NickID, -STATUS, Vaccine = ReCodedVaccineType,
           Doses = DosesCovidVacc,
           Time = time) %>%
    arrange(Vaccine, Doses) %>%
    mutate(Vaccine = unfill_for_table(Vaccine)) %>%
    kable(format  = 'simple',
          align   = 'llr',
          caption = "Maximum time since last dose of vaccine, by vaccine product")

## AMEND TO BE AVERAGE TIME SINCE LAST VACCINATION
# table of recruited+included status by vaccine
dat_to_analyse_ %>% 
    map_df(.x = .,
           ~mutate(.x,
                   Age = cut(RoundedAge, right = FALSE,
                             breaks = c(0,18,50,65,Inf), 
                             labels = c("<18", "18-49", "50-64", "65+"))) %>%
               arrange(Age) %>%
               mutate(Age = fct_inorder(Age)) %>%
               distinct(NickID, STATUS, 
                        ReCodedVaccineType, DosesCovidVacc, Age,
                        VaccDate1, VaccDate2, SAMPLE_DT) %>%
               group_by(NickID) %>%
               filter(SAMPLE_DT == min(SAMPLE_DT),
                      DosesCovidVacc != "Zero") %>%
               mutate_at(.vars = vars(VaccDate1, VaccDate2, SAMPLE_DT),
                         .funs = as.Date) %>%
               mutate(LastVaccDate = case_when(!is.na(VaccDate2) ~ VaccDate2,
                                               TRUE              ~ VaccDate1)) %>%
               mutate(diff = SAMPLE_DT - LastVaccDate)  %>%
               replace_na(list(diff = 0)) %>%
               group_by(STATUS, DosesCovidVacc, ReCodedVaccineType, Age) %>%
               #tally %>%
               summarise(Days = as.numeric(round(mean(diff, na.rm=T))),
                         .groups = 'drop') %>%
               mutate(STATUS = str_to_title(STATUS)) %>%
               mutate(key = case_when(
                   DosesCovidVacc == "Zero" ~ "None",
                   DosesCovidVacc == "One"  ~ sprintf("1 %s", ReCodedVaccineType),
                   DosesCovidVacc == "Two"  ~ sprintf("2 %s", ReCodedVaccineType),
                   TRUE                     ~ NA_character_)) %>%
               arrange(STATUS, ReCodedVaccineType) %>%
               select(-ReCodedVaccineType, -DosesCovidVacc) %>%
               mutate(key = factor(key),
                      key = fct_inorder(key)),
           .id = "Analysis") %>%
    filter(!is.na(Days)) %>%
    spread(Analysis, Days, fill = NA) %>%
    mutate(diff = Main - Sensitivity) %>%
    mutate(value = ifelse(diff != 0,
                          sprintf("%s (%s)", Main, Sensitivity),
                          Main)) %>%
    select(-Main, -Sensitivity, -diff) %>% 
    arrange(key) %>%
    spread(Age, value, fill = NA) %>%
    rename(Status = STATUS, Vaccine = key)  %>%
    mutate(Status = unfill_for_table(Status)) %T>%
    write_csv('results/days.csv') %>%
    knitr::kable(., booktabs = TRUE, 
                 linesep = "", align = 'llrrrr',
                 format = 'simple', caption = 'Average number of days since last vaccination for each vaccine status and age group. Numbers in brackets show the average duration in the sensitivity analysis where it differs from the main analysis. There are no vaccinated individuals younger than 18') 
