library(tidyverse)
library(tidyverse)
library(cowplot)
library(here)
theme_set(theme_cowplot())
library(patchwork)
appender <- function(string, suffix = " Years Old") paste0(string, suffix)

sigfig <- function(vec, digits=2){
  stringr::str_trim(formatC( round( vec, digits ), format='f', digits=digits ))
}

dat_1a <- data.table::fread(here("data","data-figure-2-a.csv"))


# Figure 2a --------------------------------
fig2a  <- dat_1a |>
mutate(SystemNM = factor(SystemNM, c("Respiratory", 
    "Gastrointestinal", 
    "Systemic"))) |>
mutate(AgeDSC = factor(AgeDSC, c("2-4 Years", "5-11 Years", "12-17 Years"))) |>
ggplot(aes(reorder(SymptomDSC,desc(SymptomDSC )), 
    y = point, ,ymin = low, ymax = high))+
geom_pointrange()+
facet_grid(rows = vars(SystemNM), cols = vars(AgeDSC), scales = "free_y")+
coord_flip()+
geom_hline(yintercept = 0, lty = "dashed")+
scale_y_continuous(
    name = "Difference in Reporting Rates in Seropositive compared to Seronegative (%)", 
    labels = scales::percent)+
labs(x = NULL)

# Figure 2b------------------------------------------

dat_1b <- data.table::fread(here("data", "data-figure-2-b.csv"))

fig_2_b <- dat_1b %>%
  select(AgeDSC, SxStatusDSC) %>%
  mutate(AgeDSC = factor(AgeDSC, c("12-17", "5-11", "2-4"))) |>
  count(AgeDSC, SxStatusDSC) |>
  group_by(AgeDSC) |>
  mutate(prop = n/sum(n)) |>
  filter(SxStatusDSC == "Symptomatic") |>
  ggplot(aes(AgeDSC, prop))+
    geom_point(size = 2.5)+
    geom_segment(aes( y= 0, xend = AgeDSC, yend = prop))+
    cowplot::theme_cowplot()+
    coord_flip()+
    scale_y_continuous(name = "Symptomatic, Positive (%)",limits = c(0,1), labels = scales::percent, expand = c(0,.05))+
    labs(
      x = NULL
    )


# Figure 3c------------------------------------------

dat_1c <- data.table::fread(here("data", "data-figure-2-c.csv"))

rez = dat_1c |>
group_by(sx) |>
nest() |>
mutate(mod = map(data, ~{
    glm(CovidFLG ~ EverFLG, data = .x, family = binomial()) |>
    broom::tidy() |>
    filter(term == "EverFLG") |>
    mutate(OR = exp(estimate),
           OR_lo = exp(estimate - 1.96 * std.error),
           OR_hi = exp(estimate + 1.96 * std.error)) 
})) |>
mutate(total = map(data, ~sum(.x$CovidFLG))) |>
mutate(events = map(data, ~sum(.x$EverFLG))) |>
unnest(cols = c(mod, total,events)) |>
filter(!is.na(estimate)) |>
select(sx, total,events, OR, OR_lo, OR_hi, p.value) |>
mutate(SxDSC = case_when(sx == "GIFLG"~ "Gastrointestinal",
                         sx == "SystemicFLG" ~ "Systemic",
                         TRUE ~ "Respiratory"))

fig2c <- rez |>
mutate(use_label = sprintf("%s (95%%CI %s, %s)\np-value %s",sigfig(OR), sigfig(OR_lo), sigfig(OR_hi), scales::pvalue(p.value) )) |>
mutate(SxDSC = factor(SxDSC, rev(c("Respiratory", "Gastrointestinal", "Systemic")))) |>
ggplot(aes(SxDSC, OR))+
geom_pointrange(aes(ymin = OR_lo, ymax = OR_hi))+
coord_flip()+
theme_classic(base_size = 12)+
scale_y_continuous(name ="Odds Ratio (Infection Induced Antibodies)",limits = c(0,NA), expand = c(0,0))+
geom_hline(yintercept = 1 , lty = 2)+
labs(x = NULL)
