# hhSAR summary statistics (for paper)

## how many contacts across the households?

dat_clean %>%
    mutate(StatedHouseholdSize = parse_integer(
        substr(StatedHouseholdSize, 1, 1)),
        StatedHouseholdSize = StatedHouseholdSize - 1) %>%
    distinct(HOCONUMBER, StatedHouseholdSize) %>%
    filter(StatedHouseholdSize > 0) %>%
    nest(data = everything()) %>%
    mutate(n    = map_dbl(data, nrow),
           Mean = map_dbl(data, ~mean(.x$StatedHouseholdSize)),
           SD   = map_dbl(data, ~sd(.x$StatedHouseholdSize)),
           Q    = map(data, ~quantile(.x$StatedHouseholdSize)),
           Mode = map_dbl(data, function(x){
               count(x, StatedHouseholdSize) %>%
                   filter(n == max(n)) %>%
                   pull(StatedHouseholdSize) %>%
                   return
           }),
    )  %>%
    unnest_wider(Q)  %>%
    select(Mean, Mode, SD, contains("%"), n) %>%
    knitr::kable(., format = 'simple', digits = 1, 
                 caption = "Number of household contacts")

# number of cases
vas_dat %>%
    map_dfr(~distinct(.x, HOCONUMBER) %>% 
                summarise(n = n()),
            .id = "Analysis") %>% 
    knitr::kable(., format = 'simple',
                 caption = "Number of household cases")

# number of contacts
vas_dat %>%
    map_dfr(~distinct(.x, NickID_CONTACT) %>% 
                summarise(n = n()),
            .id = "Analysis") %>% 
    knitr::kable(., format = 'simple',
                 caption = "Number of household contacts")

# number of contacts and cases infected

map2(.x = dat_to_analyse, .y = vas_dat,
     .f = ~inner_join(.x, distinct(.y, HOCONUMBER)) %>%
         mutate(Covid = 0L + grepl(pattern = "SARS CoV-2 detected in this sample", x = FLCORC))) %>%
    map_df(~group_by(.x, STATUS, NickID) %>%
            summarise(Covid = max(Covid, na.rm = T)) %>%
            group_by(STATUS) %>%
            summarise(n = sum(Covid),
                      N = n()),
           .id = "Analysis") %>%
    mutate(p = percent(n/N, accuracy = 1))


# number of contacts and cases infected

dat_to_analyse %>%
    map(~mutate(.x, Covid = 0L + grepl(pattern = "SARS CoV-2 detected in this sample", 
                                       x = FLCORC)) %>%
            group_by(NickID, STATUS) %>%
            summarise(Covid = max(Covid, na.rm=T)) %>%
            group_by(STATUS) %>%
            summarise(n = sum(Covid),
                      N = n()))


# how many in each combination of vaccine status?

vax_and_swabs %>%
    map(~ungroup(.x) %>%
            mutate_at(.vars = vars(dose_CONTACT, dose_CASE),
                      .funs = function(x){
                          fct_recode(x, 
                                     " " = "Zero", 
                                     "1" = "One",
                                     "2" = "Two")}) %>%
            transmute(CONTACT = paste(dose_CONTACT, vax_CONTACT),
                      CASE    = paste(dose_CASE,    vax_CASE)) %>%
            mutate_all(.funs = trimws) %>%
            group_by(CASE, CONTACT) %>%
            count %>%
            ungroup) %>%
    bind_rows(., .id = "Min. SI") %>%
    spread(`Min. SI`, n) %>%
    mutate(diff = Sensitivity - Main) %>%
    mutate(value = ifelse(diff > 0,
                          sprintf("%s (%s)", Main, diff),
                          Main)) %>%
    select(-one_of(names(min_SIs)), -diff) %>%
    mutate_at(.vars = vars(CASE, CONTACT),
              .funs = ~factor(., levels = c(paste(rep(c(1,2), 
                                                      times = 2),
                                                  rep(c("ChAdOx1", "BNT162b2"),
                                                      each = 2)), "None"))) %>%
    arrange(CONTACT, CASE) %>%
    spread(CONTACT, value) %>%
    rename(Case = CASE) %>%
    knitr::kable(., booktabs = TRUE, 
                 linesep = "",
                 format = 'simple',
                 caption = "Number of contacts with listed vaccine status for each case vaccine status. Numbers in brackets show the additional individuals included in the sensitivity analysis") 

# how many total cases and contacts vaccinated?

{map(.x = list(`All` = 0,
               `Adults only` = 18),
     .f = ~filter(vaccines %>%
                      map_df(~bind_rows(.x, .id = "STATUS"), .id = "Analysis") %>%
                      mutate(Vaccinated = ifelse(dose == "Zero",
                                                 "Unvaccinated",
                                                 "Vaccinated")) %>%
                      inner_join(distinct(dat_clean, NickID, RoundedAge)),
                  RoundedAge >= .x))} %>%
    map_df(
        ~count(.x, Analysis, STATUS, Vaccinated) %>%
            group_by(Analysis, STATUS) %>%
            mutate(N = sum(n),
                   p = percent(n/N,accuracy = 1)) %>%
            select(-N) %>% 
            gather(key, value, n, p) %>%
            spread(Analysis, value) %>%
            group_by(STATUS, Vaccinated, key) %>%
            transmute(value = sprintf("%s (%s)", Main, Sensitivity)) %>%
            spread(key, value) %>%
            rename(Status = STATUS,
                   Number = n,
                   Percent = p) %>%
            mutate(Status = str_to_title(Status)),
        .id = "Age group") %>%
    ungroup %>%
    mutate_at(.vars = vars(`Age group`, Status),
              .funs = unfill_for_table)  %>%
    knitr::kable(format = 'simple', align = 'lllrr')

# symptomatic status of CASES
dat_to_analyse %>%
    map_df(~filter(.x, STATUS == "CASE") %>%
               distinct(NickID, CASESymptomStartDate) %>%
               mutate(Status = ifelse(is.na(CASESymptomStartDate), 
                                      "Asymptomatic", "Symptomatic")) %>%
               count(Status) %>%
               mutate(p = percent(n/sum(n), accuracy = .1)) %>%
               rename(Number = n, Percentage = p),
           .id = "Analysis") %>%
    kable(format = 'simple', align = 'llrr',
          caption = 'Number of cases which ever develop symptoms')


# by vaccine status
dat_to_analyse$Main %>%
    filter(STATUS == "CASE") %>%
    mutate(key = case_when(
        DosesCovidVacc == "Zero" ~ "Unvaccinated",
        DosesCovidVacc == "One"  ~ sprintf("One dose %s",  ReCodedVaccineType),
        DosesCovidVacc == "Two"  ~ sprintf("Two doses %s", ReCodedVaccineType),
        TRUE                     ~ NA_character_)) %>%
    arrange(ReCodedVaccineType) %>%
    select(-ReCodedVaccineType, -DosesCovidVacc) %>%
    mutate(key = factor(key),
           key = fct_inorder(key)) %>%
    ungroup %>%
    select(NickID, CASESymptomStartDate, key) %>%
    distinct %>%
    mutate(Status = ifelse(is.na(CASESymptomStartDate), "Asymptomatic", "Symptomatic")) %>%
    count(Status, key) %>%
    complete(Status, nesting(key), fill = list(n = 0)) %>%
    group_by(key) %>%
    mutate(p = n/sum(n)) %>%
    arrange(key, Status) %>%
    group_by(Status, key) %>%
    transmute(value = sprintf("%s (%0.0f%%)", n, 100*p)) %>%
    spread(Status, value) %>%
    rename(Status = key) %>%
    kable(format = 'simple', align = 'lrr',
          caption = 'Proportion of cases who are symptomatic, by vaccine status')

# symptomatic status of contacts
dat_SI %>%
    filter(SerialInterval >= min_SIs$Main | is.na(SerialInterval)) %>%
    mutate(Covid        = ifelse(Covid, "Contact infected", "Contact uninfected"),
           Asymptomatic = ifelse(Asymptomatic, "Contact asymptomatic", "Contact symptomatic")) %>%
    count(Covid, Asymptomatic) %>%
    spread(Asymptomatic, n) %>%
    mutate(Total = `Contact asymptomatic` + `Contact symptomatic`) %>%
    {bind_rows(.,
               summarise_at(., 
                            .vars = vars(-Covid),
                            .funs = sum) %>%
                   mutate(Covid = "Total"))} %>% 
    kable(format = 'simple',
          caption = "Number of cases and contacts who are symptomatic")

# variants for each case

Variants %>%
    map_df(~count(.x, Variant), .id = "Analysis") %>%
    group_by(Analysis) %>%
    mutate(p = percent(n/sum(n),1)) %>%
    ungroup %>%
    gather(key, value, n, p) %>%
    spread(Analysis, value) %>%
    group_by(Variant, key) %>%
    transmute(value = sprintf("%s (%s)", Main, Sensitivity)) %>%
    spread(key, value)  %>%
    rename(Number = n,
           Percent = p) %>%
    kable(format = 'simple', align = 'lrr',
          caption = 'Number of cases, either index or secondary, by variant (sensitivity)')

# quantiles of serial interval
# a little stochastic due to algorithm
min_SIs %>%
    map(~fit_sn_to_serial_interval(dat_SI %>% filter(Covid == 1), .x)) %>%
    map('estimate') %>%
    map_df(~get_sn_quantiles_from_parms(.x, digits = 0),
           .id = 'Analysis') %>%
    kable(format = 'simple', align = 'lrrr',
          caption = 'Quantiles of skew-normal distribution of serial intervals')

# quantiles from data
min_SIs %>% 
    map_df(~filter(dat_SI, Covid == 1, SerialInterval >= .x),
           .id = "Analysis") %>%
    nest(data = -Analysis) %>%
    mutate(Q = map(data, ~quantile(.x$SerialInterval, probs = c(0.025, 0.5, 0.975)))) %>%
    unnest_wider(Q) %>%
    select(-data) %>%
    kable(format = 'simple', align = 'lrrr',
          caption = 'Quantiles of observed serial intervals')

# table of recruited+included status by vaccine
dat_to_analyse %>% 
    map_df(.x = .,
           ~mutate(.x,
                   Age = cut(RoundedAge, right = FALSE,
                             breaks = c(0,18,50,65,Inf), 
                             labels = c("<18", "18-49", "50-64", "65+"))) %>%
               arrange(Age) %>%
               mutate(Age = fct_inorder(Age)) %>%
               distinct(NickID, STATUS, 
                        ReCodedVaccineType, DosesCovidVacc, Age) %>%
               group_by_at(.vars = vars(-NickID)) %>%
               tally %>%
               ungroup %>%
               mutate(STATUS = str_to_title(STATUS)) %>%
               mutate(key = case_when(
                   DosesCovidVacc == "Zero" ~ "None",
                   DosesCovidVacc == "One"  ~ sprintf("1 %s", ReCodedVaccineType),
                   DosesCovidVacc == "Two"  ~ sprintf("2 %s", ReCodedVaccineType),
                   TRUE                     ~ NA_character_)) %>%
               arrange(STATUS, ReCodedVaccineType) %>%
               select(-ReCodedVaccineType, -DosesCovidVacc) %>%
               mutate(key = factor(key),
                      key = fct_inorder(key)),
           .id = "Analysis")%>%
    spread(Analysis, n, fill = 0) %>%
    mutate(diff = Sensitivity - Main) %>%
    mutate(value = ifelse(diff > 0,
                          sprintf("%s (%s)", Main, diff),
                          Main)) %>%
    select(-one_of("Main", "Sensitivity", "Flat priors", "diff")) %>% 
    arrange(key) %>%
    spread(Age, value, fill = 0) %>%
    rename(Status = STATUS, Vaccine = key)  %>%
    mutate(Status = unfill_for_table(Status)) %>%
    knitr::kable(., booktabs = TRUE, 
                 linesep = "", align = 'llrrrr',
                 format = 'simple', caption = 'Number of index cases and their household contacts with listed vaccine status for each age group. Numbers in brackets show the additional individuals included in the sensitivity analysis. There are no index cases younger than 18') 

# age distribution

dat_clean %>% 
    distinct(NickID, RoundedAge, ReCodedVaccineType, STATUS) %>%
    nest(data = -c(ReCodedVaccineType, STATUS)) %>%
    mutate(Q = map(.x = data, ~quantile(.x$RoundedAge, 
                                        probs = c(0, 0.025, 0.5, 0.975, 1)))) %>%
    select(-data) %>%
    unnest_wider(Q) %>%
    rename(Vaccine = ReCodedVaccineType,
           Status = STATUS) %>%
    mutate(Vaccine = factor(Vaccine, levels = c("None", "ChAdOx1", "BNT162b2")),
           Status = str_to_title(Status)) %>%
    arrange(Status, Vaccine) %>%
    kable(format = 'simple', digits = 0,
          caption = "Age distribution by vaccine product")


