# get the characteristics of the sequencing data how many people enrolled, positive, sequenced
# get the proportion of sequenced trans events (case+contacts) / potential trans events (case+contacts)

meta <- read.csv('phylo/REVISEDSEQUENCINGOutput_JH.csv', header=TRUE)
# september data with earlier swabs joined on

meta %<>%
    mutate(HocoSwab.everpos = case_when(
        Hocoswab1 == "POSITIVE" | 
            Hocoswab2 == "POSITIVE" |
            Hocoswab3 == "POSITIVE" ~ "YES",
        TRUE                        ~ "NO")) %>%
    mutate(seq.status = case_when(ERR > 1 ~ "YES")) %>% # not sure ERR>1 is best given we have text
    mutate(NickID = gsub(pattern = " ", replacement = "", NickID)) # just in case there are any spaces in the NickIDs

## do individuals have at least one ERR? and do they have Covid?

meta_valid.seq <- meta %>%
    group_by(NickID) %>%
    summarise(valid.seq = max(ERR != ""),
              Covid     = max(HocoSwab.everpos == "YES")) %>%
    replace_na(list(Covid = 0)) %>%
    filter(Covid > 0)

## how many combinations of case and contact are there?

meta_relationships <- meta %>% 
    distinct(NickID, HOCONUMBER, STATUS) %>%
    group_by(HOCONUMBER) %>%
    group_split %>%
    map(~split(.x, .x$STATUS)) %>%
    map_df(~inner_join(select(.x$CONTACT, HOCONUMBER, NickID_CONTACT = NickID),
                       select(.x$CASE,    HOCONUMBER, NickID_CASE    = NickID),
                       by = 'HOCONUMBER'))

meta_relationships %>% 
    # does the contact have a valid sequenced swab?
    left_join(rename_all(meta_valid.seq, 
                         .funs = ~paste0(., "_CONTACT")),
              by = 'NickID_CONTACT') %>%
    # dose the case have a valid sequeneced swab?
    left_join(rename_all(meta_valid.seq, 
                         .funs = ~paste0(., "_CASE")),
              by = 'NickID_CASE') %>%
    count(valid.seq_CONTACT, valid.seq_CASE) %>%
    filter(!is.na(valid.seq_CONTACT)) %>%
    rename_at(.vars = vars(contains('valid.seq_')),
              .funs = ~sub(pattern = 'valid.seq_',
                           replacement = "",
                           x = .)) %>%
    mutate_at(.vars = vars(CONTACT, CASE),
              .funs = ~case_when(. == 0   ~ "Unsequenced",
                                 . == 1   ~ "Sequenced",
                                 is.na(.) ~ "Missing",
                                 TRUE     ~ "Unknown"))  %>%
    mutate_at(.vars = vars(CASE, CONTACT),
              .funs = ~factor(., levels = c("Unsequenced",
                                          "Sequenced",
                                          "Missing"))) %>%
    spread(CONTACT, n, fill = 0) %>%
    mutate(Case = paste("Case", CASE)) %>%
    select(-CASE) %>%
    rename_at(.vars = vars(-Case), .funs = ~paste("Contact", .)) %>%
    select(Case, everything()) %>%
    kable(format = 'simple')
