# hhSAR PCR analysis

dat_for_pcr_summaries <- dat_to_analyse$Main %>%
    amend_vaccination_status %>%
    #filter(STATUS == "CASE") %>%
    mutate(
        SymptomStartDate = 
            case_when(
                is.na(CASESymptomStartDate)    &
                    STATUS == "CASE"    ~ NA_Date_,
                is.na(CONTACTSymptomStartDate) & 
                    STATUS == "CONTACT" ~ NA_Date_,
                STATUS == "CASE"        ~ as.Date(CASESymptomStartDate),
                TRUE                    ~ as.Date(CONTACTSymptomStartDate))) %>%
    mutate(Covid = 0L + grepl(pattern = "SARS CoV-2 detected in this sample", x = FLCORC)) %>%
    select(NickID,ReCodedVaccineType, SymptomStartDate,
           DosesCovidVacc, STATUS,
           SAMPLE_DT, FLCOEC, FLCO8C, Covid) %>%
    replace_na(list(FLCOEC = 40, FLCO8C = 40)) %>%
    group_by(NickID) %>%
    mutate(EverCovid = max(Covid)) %>%
    filter(EverCovid == 1 | STATUS == "CASE") %>%
    select(-EverCovid)

# symptomatic cases and contacts
dat_for_pcr_summaries %>%
    mutate(HOCONUMBER = sub(pattern = '(^.*)/.*',
                            replacement = "\\1", 
                            x = NickID)) %>%
    mutate(HOCONUMBER = trimws(HOCONUMBER)) %>%
    left_join(Variants$Sensitivity)  %>%
    mutate(Variant2 = cut(p_delta, c(0, 0.5, 1), right = T, include.lowest = T,
                          labels = c("Alpha", "Delta"))) %>%
    group_by(NickID, STATUS, Variant2) %>%
    summarise_at(.vars = vars(SymptomStartDate, Covid), max) %>%
    filter(Covid == 1) %>%
    group_by(STATUS, Variant2) %>%
    summarise(x = sum(is.na(SymptomStartDate)),
              n = n(), 
              .groups = 'drop') %>%
    mutate(test = map2(.x = x, .y = n, ~binom.test(x = .x, n = .y))) %>%
    mutate(CI = map(test, 'conf.int'),
           CI = map(CI, ~setNames(.x, c("Lower", "Upper")))) %>%
    unnest_wider(CI) %>%
    mutate(M = x/n) %>%
    mutate_at(.vars = vars(Lower, Upper, M),
              .funs = percent, accuracy = 1) %>%
    # group_by(STATUS, Variant2) %>%
    mutate(Asymptomatic = sprintf("%s (%s, %s)", M, Lower, Upper)) %>%
    select(Status = STATUS,
           Variant = Variant2, 
           x, n, Asymptomatic) %>%
    mutate(Status = unfill_vec(str_to_title(Status), "")) %>%
    kable(format = 'simple',
          caption = "Percentage of cases and contacts who are asymptomatic, by household variant")

dat_PCR <- dat_for_pcr_summaries %>%
    gather(key, value, contains("FLC")) %>%
    group_by(NickID) %>%
    mutate(t = as.numeric(SAMPLE_DT - SymptomStartDate)) %>%
    mutate(y = (40 - value)/40) %>% 
    nest(data = -c(key, ReCodedVaccineType, DosesCovidVacc))


dat_PCR_pred_t <- data.frame(t = seq(0, 20, by = 1))

dat_for_pcr_summaries %>%
    gather(key, value, contains("FLC")) %>%
    nest(data = -c(key, ReCodedVaccineType, DosesCovidVacc)) %>%
    mutate(min_PCR = map(data, ~group_by(.x, NickID) %>%
                             summarise(value = min(value)))) %>%
    mutate(Q = map(min_PCR, ~mean_cl_boot(.x$value))) %>%
    unnest_wider(Q)  %>%
    mutate_at(.vars = vars(starts_with('y')),
              .funs = as.numeric) %>%
    as_tibble %>%
    mutate(CI = paste0(y, ymin, ymax),
           CI = sprintf("%0.1f (%0.1f, %0.1f)", y, ymin, ymax)) %>%
    select(-starts_with("y"),
           -min_PCR,
           -data) %>%
    spread(key, CI) %>%
    rename(Vaccine = ReCodedVaccineType,
           Doses   = DosesCovidVacc) %>%
    mutate(Vaccine = unfill_vec(as.character(Vaccine), "")) %>%
    kable(format = 'simple', caption = "Peak measured PCR values by vaccine status")

dat_for_pcr_summaries %>%
    group_by(NickID, ReCodedVaccineType, DosesCovidVacc) %>%
    summarise(Covid = tail(Covid,1), .groups = 'drop') %>%
    group_by_at(.vars = vars(-Covid, -NickID)) %>%
    nest %>%
    mutate(x = map_dbl(.x = data, ~sum(.x$Covid)),
           n = map_dbl(.x = data, nrow),
           p = x/n) %>%
    mutate(prop.test = map2(.x = x, .y = n, ~prop.test(x = .x, n = .y))) %>%
    mutate(CI = map(.x = prop.test, tidy))  %>%
    unnest_wider(CI) %>%
    
    mutate(DosesCovidVacc     = factor(DosesCovidVacc, 
                                       levels  = c("Zero", "One", "Two"),
                                       ordered = T),
           ReCodedVaccineType = factor(ReCodedVaccineType,
                                       levels  = c("None", "ChAdOx1", "BNT162b2"),
                                       ordered = T)) %>%
    arrange(ReCodedVaccineType, DosesCovidVacc) %>%
    ungroup %>%
    mutate(Positivity = sprintf("%0.2f (%0.2f, %0.2f)", p, conf.low, conf.high)) %>%
    select(Vaccine  = ReCodedVaccineType,
           Doses    = DosesCovidVacc,
           Positive = x, 
           Total    = n,
           Positivity) %>%
    mutate(Vaccine = unfill_vec(as.character(Vaccine), "")) %>%
    knitr::kable(., format =  'simple', align = 'llrrr',
                 caption = "PCR ever-positivity by vaccine history") 

# proportion still positive, a week in
#
# an attempt with nesting
# symptomatic only
# want to investigate a bayesian model

pcr_pos_models_data <- 
    dat_for_pcr_summaries %>%
    ungroup %>%
    na.omit %>%
    mutate(time = as.numeric(SAMPLE_DT - SymptomStartDate),
           NickID = factor(NickID))


pcr_pos_points <- dat_for_pcr_summaries %>%
    rename(Doses = DosesCovidVacc,
           Vax   = ReCodedVaccineType)  %>%
    mutate(label = ifelse(Doses == "Zero", 
                          "Unvaccinated", "Vaccinated")) %>%
    mutate(time = as.numeric(SAMPLE_DT - SymptomStartDate))


# try a Bayesian model with quadratic effect over time and sampling missing value
pcr_pos_models_dat <- pcr_pos_models_data %>%
    {bind_rows(., mutate(.,
                         FLCOEC    = 40, 
                         FLCO8C    = 40, 
                         Covid     = 0,
                         SAMPLE_DT = NA,
                         time      = NA) %>% 
                   distinct)} %>%
    group_by(NickID) %>%
    mutate(min_time = min(time, na.rm=T)) %>%
    ungroup %>%
    mutate(min_time = -pmin(0, min_time)) %>%
    arrange(NickID, SAMPLE_DT) %>%
    mutate(HOCONUMBER = sub(pattern = "/.*", replacement = "", x = NickID)) %>%
    inner_join(Variants$Main, by = 'HOCONUMBER') %>%
    mutate(NickID = droplevels(NickID)) %>%
    mutate(group = as.numeric(factor(interaction(DosesCovidVacc,
                                                 ReCodedVaccineType,
                                                 sep = " ")))) %>%
    ungroup %>%
    with(., list(Covid   = Covid, 
                 ID      = as.numeric(NickID),
                 time    = time,
                 time_na = which(is.na(time)), 
                 min_time = min_time[which(is.na(time))],
                 group   = group,
                 vax     = recode_vax(ReCodedVaccineType),
                 dose    = recode_doses(DosesCovidVacc),
                 p_delta = p_delta,
                 n       = length(time),
                 ng      = length(unique(group)),
                 nt      = sum(is.na(time)),
                 nV      = 2))

pcr_pos_models_dat_pred <-
    pcr_pos_models_dat[c("group",'vax','dose')] %>%
    as.data.frame %>%
    distinct %>%
    arrange(group) %>%
    rename_all(.funs = ~paste0(.,"_pred")) %>%
    crossing(time_pred = seq(-14,21,by=1),
             Variant   = c("Alpha", "Delta")) %>%
    mutate(d_pred = (Variant == "Alpha") + 1L)

pcr_pos_models_dat %<>% 
    append(values = as.list(pcr_pos_models_dat_pred)) %>%
    append(values = list(n_pred = nrow(pcr_pos_models_dat_pred))) %>%
    append(values = list(nID = length(unique(.$ID)))) 

# what's the 95% range for modelled incubation period?
rriskDistributions::get.gamma.par(p = c(0.5, 0.95), q = c(5.1,11.7),
                                  plot = FALSE, show.output = FALSE, ) %>%
    {qgamma(p = c(0.025, 0.975), shape = .['shape'], rate = .['rate'])} %>%
    round(., 1)

prior_matrix <- matrix(data = 0, 
                       ncol = pcr_pos_models_dat$nV, 
                       nrow = pcr_pos_models_dat$ng)

pcr_pos_models_mod <- jags.model(file  = 'hhSARposModelVariants.R',
                                 data  = pcr_pos_models_dat, 
                                 inits = list(beta.0 = prior_matrix,
                                              beta.1 = prior_matrix,
                                              beta.2 = prior_matrix))

pcr_pos_models_brn <- jags.samples(pcr_pos_models_mod,
                                   variable.names = c('beta.0',
                                                      'beta.1',
                                                      'beta.2'),
                                   n.iter = 5e3)

pcr_pos_models_pst <- coda.samples(pcr_pos_models_mod, 
                                   variable.names = c('beta.0',
                                                      'beta.1',
                                                      'beta.2'),
                                   n.iter = 5e3)

# plot(pcr_pos_models_pst, density = FALSE)

pcr_pos_models_prd <- coda.samples(pcr_pos_models_mod, 
                                   variable.names = 'p_pred',
                                   n.iter = 5e3)

pcr_pos_models_dat_predictions <- 
    tidy(pcr_pos_models_prd) %>% 
    bind_cols(pcr_pos_models_dat_pred) %>% 
    mutate(ReCodedVaccineType = decode_vax(vax_pred),
           DosesCovidVacc     = decode_doses(dose_pred),
           label = ifelse(DosesCovidVacc == "Zero", 
                          "Unvaccinated", "Vaccinated"),
           DosesCovidVacc     = to_number(as.character(DosesCovidVacc)))

pcr_pos_models_dat_predictions %>%
    unite(col = 'group', DosesCovidVacc, ReCodedVaccineType, sep = "_") %>%
    select(group, time_pred, mean)

# colours for ribbon
vax_palette_r <- c("Vaccinated"   = "grey60",
                   "Unvaccinated" = "grey80")

# colours for line
vax_palette_l <- c("Vaccinated"   = "grey40",
                   "Unvaccinated" = "grey60")

# linetype for line
vax_linetype <- c("Vaccinated"   = 1,
                  "Unvaccinated" = 2)


pcr_pos_plot_bayes <- 
    ggplot(data = pcr_pos_models_dat_predictions[dose_pred < 3],
           aes(x=time_pred, y = median )) +
    geom_ribbon(aes(ymin = `2.5%`, ymax = `97.5%`,
                    fill = Variant,
                    alpha = label,
                    group = Variant)) +
    geom_ribbon(data = 
                    pcr_pos_models_dat_predictions[dose_pred == 3] %>%
                    select(-DosesCovidVacc,
                           -ReCodedVaccineType),
                aes(ymin = `2.5%`, ymax = `97.5%`,
                    fill = Variant,
                    alpha = label,
                    group = Variant)) +
    geom_line(data =
                  pcr_pos_models_dat_predictions[dose_pred == 3] %>%
                  select(-DosesCovidVacc,
                         -ReCodedVaccineType),
              aes(color = Variant,
                  linetype = label)) +
    geom_line(aes(color = Variant,
                  linetype = label)) +
    facet_nested(Variant ~ ReCodedVaccineType + DosesCovidVacc, nest_line = T,
                 labeller = labeller(DosesCovidVacc = function(x){
                     ifelse(grepl(pattern = "1", x = x), "1 dose", paste(x, "doses"))
                 })) +
    theme(legend.position = 'bottom') +
    #geom_line(alpha = 0.25, aes(y = `Zero None`)) +
    xlab("Time since onset of symptoms (days)") +
    ylab("Proportion of ever-symptomatic\nindividuals currently PCR-positive") +
    #scale_alpha_continuous(range = c(0,1)) +
    scale_color_manual(values = lineage_cols) +
    scale_fill_manual(values  = lineage_cols) +
    scale_linetype_manual(values  = vax_linetype,
                          name = "Vaccination status") +
    scale_alpha_manual(values  = c("Vaccinated" = 0.4,
                                   "Unvaccinated" = 0.2),
                       name = "Vaccination status") +
    scale_x_continuous(limits = c(-14,21), breaks = seq(-14, 21, by =7)) +
    scale_y_continuous(breaks = seq(0,1,length.out = 5),
                       labels = function(x)sprintf("%g",x)) +
    theme(panel.grid.minor = element_blank(),
          axis.text.y = element_text(hjust = 0)) +
    guides(color = FALSE, fill = FALSE)

show_swabs <- FALSE

if (show_swabs){
    
    
    pcr_pos_points_bayes <- dat_for_pcr_summaries %>%
        mutate(label = ifelse(DosesCovidVacc == "Zero", 
                              "Unvaccinated", "Vaccinated")) %>%
        mutate(time_pred = as.numeric(SAMPLE_DT - SymptomStartDate),
               DosesCovidVacc = to_number(DosesCovidVacc)) %>%
        mutate(HOCONUMBER = sub(pattern = "/.*", replacement = "", x = NickID)) %>%
        inner_join(Variants$Main) %>%
        split(.$NickID) %>%
        map_df(~add_row(ungroup(.x), time_pred = NA, Covid = 0) %>%
                   fill(NickID, ReCodedVaccineType, DosesCovidVacc, label)) 
    
    # get the rugplot values for the missing variants
    missing_variant_for_pcr <- filter(pcr_pos_points_bayes, 
                                      !(Variant %in% c("Alpha", "Delta"))) %>%
        select(time_pred, p_delta, ReCodedVaccineType, DosesCovidVacc, label, Covid) %>% 
        filter(!is.na(time_pred)) %>%
        crossing(Variant = c("Alpha", "Delta")) %>%
        mutate(p_delta = ifelse(Variant == "Alpha", 1 - p_delta, p_delta))
    
    pcr_pos_plot_bayes <- pcr_pos_plot_bayes + 
        # observed
        geom_point(data = filter(pcr_pos_points_bayes, 
                                 DosesCovidVacc != 0,
                                 Variant %in% c("Alpha", "Delta")),
                   aes(color = label, y = Covid + 0.05*ifelse(Covid > 0.5,1,-1),
                       pch = "Sequenced"),
                   position = position_jitter(w = 0.25, h=0)) +
        geom_point(data = filter(pcr_pos_points_bayes, 
                                 DosesCovidVacc == 0,
                                 Variant %in% c("Alpha", "Delta")) %>%
                       select(-DosesCovidVacc, -ReCodedVaccineType),
                   aes(color = label, y = Covid  + 0.1*ifelse(Covid > 0.5,1,-1),
                       pch = "Sequenced"),
                   position = position_jitter(w = 0.25, h=0)) +
        
        # inferred
        geom_point( data = filter(missing_variant_for_pcr,
                                  DosesCovidVacc != 0),
                    aes(color = label, y = Covid + 0.15*ifelse(Covid > 0.5,1,-1),
                        alpha = p_delta,
                        pch = "Inferred"),
                    position = position_jitter(w = 0.25, h=0),
                    size = 1) +
        geom_point(data = filter(missing_variant_for_pcr,
                                 DosesCovidVacc == 0) %>%
                       select(-DosesCovidVacc, -ReCodedVaccineType),
                   aes(color = label, y = Covid  + 0.2*ifelse(Covid > 0.5,1,-1),
                       alpha = p_delta,
                       pch = "Inferred"),
                   position = position_jitter(w = 0.25, h=0),
                   size = 1) +
        guides(alpha = FALSE) +
        scale_shape_manual(values = c("Sequenced" = 124,
                                      "Inferred"  = 1),
                           name = "Variant")
}


ggsave(plot     = pcr_pos_plot_bayes, 
       filename = 'figures/pcr_pos_plot_bayes_variants.pdf', width = 8, height = 4,
       device   = cairo_pdf)


## relative risks
RRs <- mmcc:::mcmc_to_dt(pcr_pos_models_prd)[, row := parse_number(as.character(parameter))]
RRs %<>% merge(rownames_to_column(pcr_pos_models_dat_pred, "row") %>% 
                   mutate(row = parse_number(row)))

RRs_summarised <-
    RRs %>% select(-chain, -parameter, -row, -vax_pred, -dose_pred) %>%
    spread(group_pred, value) %>%
    mutate_at(.vars = vars(c(`2`, `3`, `4`, `5`)),
              .funs = ~multiply_by(., 1/`1`)) %>%
    select(-`1`) %>%
    gather(group_pred, value, c(`2`, `3`, `4`, `5`)) %>%
    group_by(group_pred,time_pred, Variant) %>%
    select(-d_pred) %>%
    nest() %>%
    mutate(Q = map(.x = data, ~quantile(.x$value, c(0.025, 0.5, 0.975)))) %>%
    unnest_wider(Q) %>%
    select(-data) %>% 
    mutate(group_pred = parse_number(group_pred)) %>%
    inner_join(pcr_pos_models_dat_pred) %>%
    mutate(ReCodedVaccineType = decode_vax(vax_pred),
           DosesCovidVacc     = to_number(decode_doses(dose_pred)))


pcr_pos_plot_RRs <-
    RRs_summarised  %>%
    ggplot(data = .,
           aes(x = time_pred, y = `50%`)) +
    geom_ribbon(aes(ymin = `2.5%`,
                    ymax = `97.5%`,
                    fill = Variant),
                alpha = 0.25) +
    geom_line(aes(color = Variant)) +
    facet_nested(Variant ~ ReCodedVaccineType + DosesCovidVacc, nest_line = T,
                 labeller = labeller(DosesCovidVacc = function(x){
                     ifelse(grepl(pattern = "1", x = x), "1 dose", paste(x, "doses"))
                 })) +
    coord_cartesian(xlim = c(-5,14), ylim = c(0,1.25)) +
    scale_y_continuous(breaks = seq(0,1.25, by = .25),
                       labels = function(x)sprintf("%g",x)) +
    scale_x_continuous(breaks = c(0,7,14)) +
    xlab("Time since onset of symptoms (days)") +
    ylab("Relative Risk") +
    theme(panel.grid.minor = element_blank(),
          axis.text.y = element_text(hjust = 0),
          legend.position = 'none') +
    scale_fill_manual(values = lineage_cols) +
    scale_color_manual(values = lineage_cols)


ggsave(plot     = pcr_pos_plot_RRs, 
       filename = 'figures/pcr_pos_plot_RRs_variants.pdf', width = 8, height = 4,
       device   = cairo_pdf)

# meh, not super great
{pcr_pos_plot_bayes + pcr_pos_plot_RRs +
        plot_layout(ncol = 1) + 
        plot_annotation(tag_levels = "A")} %>%
    ggsave(plot     = ., 
           filename = 'figures/pcr_pos_plot_pos_RRs_variants.pdf', width = 6, height = 6,
           device   = cairo_pdf)


RRs_summarised %>%
    filter(time_pred == 14, Variant == 'Delta') %>%
    group_by(ReCodedVaccineType, Doses = dose_pred) %>%
    transmute(value = sprintf("%0.2f (%0.2f, %0.2f)", `50%`, `2.5%`, `97.5%`))


# what do the Ct values look like?

pcr_pos_models_data_for_Ct <- 
    pcr_pos_models_data %>%
    mutate(HOCONUMBER = sub(pattern = '(.*)/.*', replacement = '\\1', x = NickID)) %>%
    mutate(HOCONUMBER = trimws(HOCONUMBER)) %>%
    left_join(Variants$Sensitivity)  %>%
    mutate(Variant2 = cut(p_delta, c(0, 0.5, 1), right = T, include.lowest = T,
                          labels = c("Alpha", "Delta")))

ggplot(data = filter(pcr_pos_models_data_for_Ct, DosesCovidVacc != "Zero"),
       aes(y = FLCOEC, x = time)) +
    geom_point(data = filter(pcr_pos_models_data_for_Ct, DosesCovidVacc == "Zero") %>%
                   select(-DosesCovidVacc, -ReCodedVaccineType),
               color = 'grey60',
               alpha = 0.5) +
    geom_point(alpha = 0.5, pch = 1) +
    scale_y_reverse() +
    facet_nested(Variant2 ~ ReCodedVaccineType + DosesCovidVacc, nest_line = T) +
    theme(panel.grid.minor = element_blank()) +
    geom_smooth(method = 'loess')

pcr_pos_models_data_for_Ct %>%
    gather(Assay, value, FLCO8C, FLCOEC) %>%
    group_by(DosesCovidVacc, ReCodedVaccineType, Assay, Variant2) %>%
    nest %>%
    mutate(M = map(.x = data, ~Hmisc::smean.cl.boot(.x$value))) %>%
    unnest_wider(M) %>%
    ggplot(data = ., aes(x = DosesCovidVacc, y = Mean,
                         group = Assay, color = Assay)) +
    geom_pointrange(aes(ymin = Lower, ymax = Upper),
                    position = position_dodge(width = 0.5)) +
    facet_grid(Variant2 ~ ReCodedVaccineType, scales = 'free_x') +
    scale_y_reverse() +
    theme(legend.position = 'bottom', 
          panel.grid.minor = element_blank()) +
    xlab("Doses") +
    ylab("PCR Cycle threshold")
