),
covid_resultado_alt = ifelse(covid_valor < 0.4, "Neg",
ifelse(covid_valor >=0.4 & covid_valor <1.4, "Ind", "Pos")))
# Dates
sero <- sero %>% mutate(min_dt = mdy(min_dt))
# Change location between São Paulo and Manaus to generate plots 2B and 2D
ggplot() +
ggbeeswarm::geom_quasirandom(data = sero %>% filter(location == "Manaus"), # change here - Sao Paulo or Manaus
aes(x = min_dt + mid_adjust, y = covid_valor, color = covid_resultado_alt),
alpha = 0.5, size = 0.05) +
geom_boxplot(data = sero %>% filter(location == "Manaus", # change here - Sao Paulo or Manaus
covid_valor > 0.4),
aes(x = min_dt + mid_adjust, y = covid_valor, group = sample_month),
alpha = 0.3, outlier.size = -1,
width = 3) +
geom_hline(yintercept = 1.4, linetype = 2) +
geom_hline(yintercept = 0.4, linetype = 2) +
scale_color_manual(values = c("orange", "darkblue", "lightpink")) +
scale_y_log10() +
scale_x_date(breaks = "months", date_labels = "%m/%y") +
labs(x = "", y = "Abbott SARS-CoV-2 IgG\nCLIA S/CO (log scale)",
color = "Result at manufactuer's threshold") +
theme_classic() +
theme(legend.position = "none")
# Packages
library(tidyverse)
library(lubridate)
library(ggbeeswarm)
####################################################################################
#################### Main Figure 1 - Assay validation ##############################
# Validation samples for the Abbott assay
# Variable "type" gives description of each group
validation <- read.csv("data/validation_data.csv", stringsAsFactors = F)
# Make the two cutoffs of 1.4 (manufactuer) and 0.4 (reduced threshold)
validation <- validation %>%
mutate(result = ifelse(abbott_sc < 1.4, "Negative", "Positive"),
result_alt = ifelse(abbott_sc >= 1.4, "Positive", ifelse(abbott_sc <1.4 & abbott_sc > 0.4, "Ind", "Negative")))
# Figure 1A
ggplot(validation) +
ggbeeswarm::geom_quasirandom(aes(x = reorder(type, order), y = abbott_sc, color = result_alt),
size = 0.5) +
geom_boxplot(aes(x = reorder(type, order), y = abbott_sc),
alpha = 0.5,
outlier.size = -1,
width = 0.2) +
scale_y_log10() +
scale_color_manual(values = c("orange", "darkblue", "red")) +
geom_hline(yintercept = 1.4, linetype = 2) +
geom_hline(yintercept = 0.4, linetype = 2) +
theme_classic() +
labs(x = "Sample type",
y = "Abbott SARS-CoV-2 IgG CLIA S/C\n(log scale)") +
theme(legend.position = "none")
# Figure 1B
# Cohort of convalescent plasma donors with multiple blood draws
plasma_donors <- read.csv("data/convalescent_plasma_longitudinal.csv", stringsAsFactors = F)
# Figure 1B
ggplot(plasma_donors) +
geom_point(aes(x = days_post_symptoms, y = abbott_sc)) +
geom_line(aes(x = days_post_symptoms, y = abbott_sc, group = donor_id)) +
labs(x = "Days after symptom onset",
y = "Abbott SARS-CoV-2 IgG CIMA S/C") +
theme_classic()
# Figure 1C
baseline <- plasma_donors %>%
filter(seropositive_at_baseline == "yes", followup_visit == "baseline") %>%
select(donor_id, first_sc = abbott_sc, first_timepoint = days_post_symptoms) %>%
ungroup()
follow_up <- plasma_donors %>%
filter(seropositive_at_baseline == "yes", followup_visit == "first follow-up") %>%
select(donor_id, second_sc = abbott_sc, second_timepoint = days_post_symptoms) %>%
ungroup()
# Calculate the decay expressed in log base 2 units per day
gradients <- baseline %>% left_join(follow_up, by = "donor_id")
gradients <- gradients %>%
mutate(diff_time = second_timepoint - first_timepoint,
gradient = log(second_sc, 2) - log(first_sc, 2),
gradient = gradient/diff_time)
ggplot(gradients) +
geom_histogram(aes(x = gradient*100),
color = "black",
fill = "white") +
geom_vline(xintercept = mean(gradients$gradient)*100, linetype = 2, color = "red") +
annotate("text", color = "red", x = -3, y = 9, label = "Mean half-life\n106 days") +
scale_x_continuous(breaks = seq(-4, 3, 1)) +
theme_classic() +
labs(x = "Change in Abbott SARS-CoV-2 IgG\nCIMA S/C (log2 units) per 100 days",
y = "Number of plasma donors")
# Total daily deaths and daily mortality standardized to age-structure of Brazilian population, according to the date of death
stand_deaths <- read.csv("data/stand_deaths.csv", stringsAsFactors = F)
# Prevalence estimates with various corrections calculated with the 1.4 and 0.4 thresholds
prev <- read.csv("data/plot_df_prevalences.csv", stringsAsFactors = F)
# Y-axis limit for mortality and prevalence plots
scaler <- max(stand_deaths$brazil_rate)
# Change the location to plot Sao Paulo or Manaus
prev <- prev %>% filter(location == "Manaus") # Change here
stand_deaths <- stand_deaths %>% filter(location == "Manaus") # Change here
# Convert dates into date format
prev <- prev %>% mutate(min_dt = mdy(min_dt))
stand_deaths <- stand_deaths %>% mutate(DT_EVOLUCA = mdy(DT_EVOLUCA))
# Cumulative deaths
stand_deaths <- stand_deaths %>%
arrange(DT_EVOLUCA) %>%
mutate(cum_deaths = cumsum(total_deaths))
# These scalers are needed to rescale the cumulative deaths curve to the maximum
# corrected prevalence value
scaler1 <- max(stand_deaths$cum_deaths)
scaler2 <- max(prev$prev, na.rm = T)
# To offset/dodge points
prev <- prev %>%
mutate(dodge = case_when(
type == "Age-sex re-weighted (1.4)" ~ -6,
type == "Age-sex & sens/spec adjusted (1.4)" ~ -3,
type == "Age-sex re-weighted (0.4)" ~ 0,
type == "Age-sex & sens/spec adjusted (0.4)" ~ 3,
type == "Seroreversion corrected (1.4)" ~ 6
))
ggplot() +
geom_line(data = stand_deaths, aes(x = DT_EVOLUCA, y = (cum_deaths/scaler1)*scaler2*scaler)) +
geom_col(data = stand_deaths, aes(x = DT_EVOLUCA, y = brazil_rate), alpha = 0.5) +
geom_point(data = prev,
aes(x = min_dt + mid_adjust + dodge, y = prev*scaler, color = type),
size = 2) +
geom_errorbar(data = prev,
aes(x = min_dt+mid_adjust+dodge,
ymin = ci_l*scaler,
ymax = ci_u*scaler,
color = type),
width = 3) +
scale_y_continuous(limits = c(0, scaler), sec.axis = sec_axis(~100*(./scaler), name = "SARS-CoV-2 anitbody prevalance (%)")) +
scale_x_date(breaks = "months", date_labels = "%m/%y", limits = c(dmy("01-02-2020"), dmy("30-10-2020"))) +
theme_classic() +
theme(legend.position = "top",
axis.text.x = element_text(angle = 45, hjust = T)) +
labs(x = "",
y = "Age-sex standardized COVID-19\nmortality per 100,000 inhabitants")
#### Figures 2B and 2D - signal-to-cutoff readings
sero <- read.csv("data/sero_consolidated_weights.csv", stringsAsFactors = F)
sero <- sero %>% select(covid_valor, location, donmo, min_dt, mid_adjust)
sero <- sero %>% mutate(sample_month = case_when(
donmo == 2 ~ "February",
donmo == 3 ~ "March",
donmo == 4 ~ "April",
donmo == 5 ~ "May",
donmo == 6 ~ "June",
donmo == 7 ~ "July",
donmo == 8 ~ "August",
donmo == 9 ~ "September",
donmo == 10 ~ "October"
),
covid_resultado_alt = ifelse(covid_valor < 0.4, "Neg",
ifelse(covid_valor >=0.4 & covid_valor <1.4, "Ind", "Pos")))
#### Figures 2B and 2D - signal-to-cutoff readings
sero <- read.csv("data/sero_consolidated_weights.csv", stringsAsFactors = F)
sero <- sero %>% select(abbott_sc, location, donmo, min_dt, mid_adjust)
sero <- sero %>% mutate(sample_month = case_when(
donmo == 2 ~ "February",
donmo == 3 ~ "March",
donmo == 4 ~ "April",
donmo == 5 ~ "May",
donmo == 6 ~ "June",
donmo == 7 ~ "July",
donmo == 8 ~ "August",
donmo == 9 ~ "September",
donmo == 10 ~ "October"
),
covid_resultado_alt = ifelse(abbott_sc < 0.4, "Neg",
ifelse(abbott_sc >=0.4 & abbott_sc <1.4, "Ind", "Pos")))
# Dates
sero <- sero %>% mutate(min_dt = mdy(min_dt))
# Change location between São Paulo and Manaus to generate plots 2B and 2D
ggplot() +
ggbeeswarm::geom_quasirandom(data = sero %>% filter(location == "Manaus"), # change here - Sao Paulo or Manaus
aes(x = min_dt + mid_adjust, y = abbott_sc, color = covid_resultado_alt),
alpha = 0.5, size = 0.05) +
geom_boxplot(data = sero %>% filter(location == "Manaus", # change here - Sao Paulo or Manaus
abbott_sc > 0.4),
aes(x = min_dt + mid_adjust, y = abbott_sc, group = sample_month),
alpha = 0.3, outlier.size = -1,
width = 3) +
geom_hline(yintercept = 1.4, linetype = 2) +
geom_hline(yintercept = 0.4, linetype = 2) +
scale_color_manual(values = c("orange", "darkblue", "lightpink")) +
scale_y_log10() +
scale_x_date(breaks = "months", date_labels = "%m/%y") +
labs(x = "", y = "Abbott SARS-CoV-2 IgG\nCLIA S/CO (log scale)",
color = "Result at manufactuer's threshold") +
theme_classic() +
theme(legend.position = "none")
#### Figures 2B and 2D - signal-to-cutoff readings
sero <- read.csv("data/sero_consolidated_weights.csv", stringsAsFactors = F)
sero <- sero %>% select(abbott_sc, location, donmo, min_dt, mid_adjust)
sero <- sero %>% mutate(sample_month = case_when(
donmo == 2 ~ "February",
donmo == 3 ~ "March",
donmo == 4 ~ "April",
donmo == 5 ~ "May",
donmo == 6 ~ "June",
donmo == 7 ~ "July",
donmo == 8 ~ "August",
donmo == 9 ~ "September",
donmo == 10 ~ "October"
),
covid_resultado_alt = ifelse(abbott_sc < 0.4, "Neg",
ifelse(abbott_sc >=0.4 & abbott_sc <1.4, "Ind", "Pos")))
# Dates
sero <- sero %>% mutate(min_dt = mdy(min_dt))
# Change location between São Paulo and Manaus to generate plots 2B and 2D
ggplot() +
ggbeeswarm::geom_quasirandom(data = sero %>% filter(location == "Manaus"), # change here - Sao Paulo or Manaus
aes(x = min_dt + mid_adjust, y = abbott_sc, color = covid_resultado_alt),
alpha = 0.5, size = 0.05) +
geom_boxplot(data = sero %>% filter(location == "Manaus", # change here - Sao Paulo or Manaus
abbott_sc > 0.4),
aes(x = min_dt + mid_adjust, y = abbott_sc, group = sample_month),
alpha = 0.3, outlier.size = -1,
width = 3) +
geom_hline(yintercept = 1.4, linetype = 2) +
geom_hline(yintercept = 0.4, linetype = 2) +
scale_color_manual(values = c("orange", "darkblue", "lightpink")) +
scale_y_log10() +
scale_x_date(breaks = "months", date_labels = "%m/%y") +
labs(x = "", y = "Abbott SARS-CoV-2 IgG\nCLIA S/CO (log scale)",
color = "Result at manufactuer's threshold") +
theme_classic() +
theme(legend.position = "none")
###################################################################################
#################### Calculation of infection fatality ratios  ####################
# Demographic data - city level projections
demog <- read.csv("data/demography.csv", stringsAsFactors = F)
# SIVEP-Gripe - downloaded 19-10-2020
srag <- read.csv("data/INFLUD-19-10-2020.csv",
stringsAsFactors = F,
sep = ";")
# Prevalence dataframe
prev <- read.csv("data/plot_df_prevalences.csv", stringsAsFactors = F)
# Pop size by age groups for Manaus
mn <- demog %>% filter(region == "Manaus")
mn <- mn %>% mutate(age_brackets = cut(age_low,
breaks = c(seq(-1, 80, 10), 100),
labels = c(str_c(seq(0, 70, 10), seq(9, 79, 10), sep = " to "), "80+")))
mn <- mn %>%
group_by(age_brackets) %>%
summarise(population = sum(population))
# Seroprevalence in June, prior to significant waning
seroprev1 <- prev$prev[prev$location == "Manaus" & prev$type == "Age-sex & sens/spec adjusted (1.4)" & prev$donmo == 6]
# Seroprevalence in October after adjusting for wanning
seroprev2 <- prev$prev[prev$location == "Manaus" & prev$type == "Seroreversion corrected (1.4)" & prev$donmo == 10]
# Total infections in each age group assuming constant attack rate
mn <- mn %>% mutate(total_infections_june = population*seroprev1,
total_infections_oct = population*seroprev2)
# Deaths by age group Manaus
mn_deaths <- srag %>%
filter(EVOLUCAO == 2,  # Select deaths only
CO_MUN_RES == 130260, # Select residents of Manaus only
SG_UF_NOT == "AM")
mn_deaths <- mn_deaths %>%
mutate(DT_NOTIFIC = dmy(DT_NOTIFIC), # date of notification
DT_EVOLUCA = dmy(DT_EVOLUCA)) # date of death
# Make same age brackets
mn_deaths <- mn_deaths %>%
mutate(age = as.numeric(ifelse(TP_IDADE == 3, NU_IDADE_N, 0)), # When age recorded in months or days must be infant <1yr
age_brackets = cut(age,
c(seq(-1, 80, 10), 99),
include.lowest = T,
labels = c(str_c(seq(0, 70, 10), seq(9, 79, 10), sep = " to "), "80+")))
# Select all deaths due to SARI including confirmed COVID-19
mn_deaths_sari1 <- mn_deaths %>%
filter(CLASSI_FIN %in% c(4, 5) | is.na(CLASSI_FIN)) %>%  # Change here to CLASSI_FIN == 5 to use only confirmed COVID-19 deaths
# This window spans from the first confirmed COVID-19 death up to the end of the sampling window
# in June
filter(DT_EVOLUCA <= dmy("15-06-2020") & DT_EVOLUCA >= ymd("2020-03-27")) %>%
group_by(age_brackets) %>%
tally() %>%
rename(deaths_june = n) %>%
filter(!is.na(age_brackets))
mn_deaths_sari2 <- mn_deaths %>%
filter(CLASSI_FIN %in% c(4, 5) | is.na(CLASSI_FIN)) %>%
# This window spans from the first confirmed COVID-19 death up to the end of the sampling window
# in October
filter(DT_EVOLUCA <= ymd("2020-10-17") & DT_EVOLUCA >= ymd("2020-03-27")) %>%
group_by(age_brackets) %>%
tally() %>%
rename(deaths_oct = n) %>%
filter(!is.na(age_brackets))
mn_deaths_sari <- mn_deaths_sari1 %>% left_join(mn_deaths_sari2)
rm(mn_deaths_sari1, mn_deaths_sari2)
# Put deaths and pop size together
mn <- mn %>% left_join(mn_deaths_sari, by = "age_brackets")
# caluculate IFR point estimate
mn <- mn %>% mutate(ifr_june = (deaths_june/total_infections_june)*100,
ifr_oct =  (deaths_oct/total_infections_oct)*100,
location = "Manaus")
# Global IFRs
sum(mn$deaths_june)/sum(mn$total_infections_june)
#### São Paulo
# Pop size by age groups for São Paulo
sp <- demog %>% filter(region == "Sao Paulo")
sp <- sp %>% mutate(age_brackets = cut(age_low,
breaks = c(seq(-1, 80, 10), 100),
labels = c(str_c(seq(0, 70, 10), seq(9, 79, 10), sep = " to "), "80+")))
sp <- sp %>%
group_by(age_brackets) %>%
summarise(population = sum(population))
# Seroprevalence in June, prior to significant waning
seroprev1 <- prev$prev[prev$location == "Sao Paulo" & prev$type == "Age-sex & sens/spec adjusted (1.4)" & prev$donmo == 6]
# Seroprevalence in October after adjusting for wanning
seroprev2 <- prev$prev[prev$location == "Sao Paulo" & prev$type == "Seroreversion corrected (1.4)" & prev$donmo == 10]
# Total infections in each age group assuming constant attack rate
sp <- sp %>% mutate(total_infections_june = population*seroprev1,
total_infections_oct = population*seroprev2)
# Deaths by age group Manaus
sp_deaths <- srag %>%
filter(EVOLUCAO == 2,  # Select deaths only
CO_MUN_RES == 355030, # Select residents of SP only
SG_UF_NOT == "SP")
sp_deaths <- sp_deaths %>%
mutate(DT_NOTIFIC = dmy(DT_NOTIFIC), # date of notification
DT_EVOLUCA = dmy(DT_EVOLUCA)) # date of death
sp_deaths <- sp_deaths %>%
mutate(age = as.numeric(ifelse(TP_IDADE == 3, NU_IDADE_N, 0)), # When age recorded in months or days must be infant <1yr
age_brackets = cut(age,
c(seq(-1, 80, 10), 99), # Five year age bands, going 0-4, 5-9, 10-14 etc.
include.lowest = T,
labels = c(str_c(seq(0, 70, 10), seq(9, 79, 10), sep = " to "), "80+")))
# Select all deaths due to SARI including confirmed COVID-19
sp_deaths_sari1 <- sp_deaths %>%
filter(CLASSI_FIN %in% c(4, 5) | is.na(CLASSI_FIN)) %>%  # Toggle here to CLASSI_FIN == 5 for only confirmed COVID-19
# This window spans from the first confirmed COVID-19 death up to the end of the sampling window
# in June
filter(DT_EVOLUCA <= ymd("2020-06-20") & DT_EVOLUCA >= ymd("2020-03-13")) %>%
group_by(age_brackets) %>%
tally() %>%
rename(deaths_june = n) %>%
filter(!is.na(age_brackets))
sp_deaths_sari2 <- sp_deaths %>%
filter(CLASSI_FIN %in% c(4, 5) | is.na(CLASSI_FIN)) %>%
# This window spans from the first confirmed COVID-19 death up to the end of the sampling window
# in June
filter(DT_EVOLUCA <= ymd("2020-10-24") & DT_EVOLUCA >= ymd("2020-03-13")) %>%
group_by(age_brackets) %>%
tally() %>%
rename(deaths_oct = n) %>%
filter(!is.na(age_brackets))
sp_deaths_sari <- sp_deaths_sari1 %>% left_join(sp_deaths_sari2)
rm(sp_deaths_sari1, sp_deaths_sari2)
# Put deaths and pop size together
sp <- sp %>% left_join(sp_deaths_sari, by = "age_brackets")
# caluculate IFR point estimate
sp <- sp %>% mutate(ifr_june = (deaths_june/total_infections_june)*100,
ifr_oct =  (deaths_oct/total_infections_oct)*100,
location = "São Paulo")
# Global IFRs
sum(sp$deaths_june)/sum(sp$total_infections_june)
all <- rbind(
sp %>% ungroup %>% select(age_brackets, ifr = ifr_june, location) %>% mutate(type = "June (before seroreversion)"),
sp %>% ungroup %>% select(age_brackets, ifr = ifr_oct, location) %>% mutate(type = "October (seroreversion corrected)"),
mn %>% ungroup %>% select(age_brackets, ifr = ifr_june, location) %>% mutate(type = "June (before seroreversion)"),
mn %>% ungroup %>% select(age_brackets, ifr = ifr_oct,  location) %>% mutate(type = "October (seroreversion corrected)")
)
verity <- read.csv("data/verity_ifr.csv")
verity1 <- verity %>% select(age_brackets, ifr) %>% mutate(location = "Mainland China", type = "June (before seroreversion)")
verity2 <- verity %>% select(age_brackets, ifr) %>% mutate(location = "Mainland China", type = "October (seroreversion corrected)")
all <- all %>% rbind(verity1, verity2)
ggplot(all) +
geom_col(aes(x = age_brackets, y = ifr, fill = location), color = "black", position = "dodge") +
labs(fill = "", x = "Age groups (years)", y = "Age specific infection\nfatality ratio (%)") +
theme_classic() +
facet_wrap(~ type) +
theme(axis.text.x = element_text(angle = 45, hjust = T),
legend.position = "top")
# Total daily deaths and daily mortality standardized to age-structure of Brazilian population, according to the date of death
stand_deaths <- read.csv("data/stand_deaths.csv", stringsAsFactors = F)
# Prevalence estimates with various corrections calculated with the 1.4 and 0.4 thresholds
prev <- read.csv("data/plot_df_prevalences.csv", stringsAsFactors = F)
# Y-axis limit for mortality and prevalence plots
scaler <- max(stand_deaths$brazil_rate)
#### Figures 2B and 2D - signal-to-cutoff readings
sero <- read.csv("data/sero_consolidated_weights.csv", stringsAsFactors = F)
sero <- sero %>% select(abbott_sc, location, donmo, min_dt, mid_adjust)
sero <- sero %>% mutate(sample_month = case_when(
donmo == 2 ~ "February",
donmo == 3 ~ "March",
donmo == 4 ~ "April",
donmo == 5 ~ "May",
donmo == 6 ~ "June",
donmo == 7 ~ "July",
donmo == 8 ~ "August",
donmo == 9 ~ "September",
donmo == 10 ~ "October"
),
covid_resultado_alt = ifelse(abbott_sc < 0.4, "Neg",
ifelse(abbott_sc >=0.4 & abbott_sc <1.4, "Ind", "Pos")))
# Dates
sero <- sero %>% mutate(min_dt = mdy(min_dt))
# Change location between São Paulo and Manaus to generate plots 2B and 2D
ggplot() +
ggbeeswarm::geom_quasirandom(data = sero %>% filter(location == "Manaus"), # change here - Sao Paulo or Manaus
aes(x = min_dt + mid_adjust, y = abbott_sc, color = covid_resultado_alt),
alpha = 0.5, size = 0.05) +
geom_boxplot(data = sero %>% filter(location == "Manaus", # change here - Sao Paulo or Manaus
abbott_sc > 0.4),
aes(x = min_dt + mid_adjust, y = abbott_sc, group = sample_month),
alpha = 0.3, outlier.size = -1,
width = 3) +
geom_hline(yintercept = 1.4, linetype = 2) +
geom_hline(yintercept = 0.4, linetype = 2) +
scale_color_manual(values = c("orange", "darkblue", "lightpink")) +
scale_y_log10() +
scale_x_date(breaks = "months", date_labels = "%m/%y") +
labs(x = "", y = "Abbott SARS-CoV-2 IgG\nCLIA S/CO (log scale)",
color = "Result at manufactuer's threshold") +
theme_classic() +
theme(legend.position = "none")
library(dplyr)
library(ggplot2)
library(survey)
library(lubridate)
library(tidyverse)
prev_df <- read.csv("data/sero_consolidated_weights.csv", stringsAsFactors = F)
prev_df <- prev_df %>% mutate(covid_resultado = ifelse(abbott_result == "Positive", 1, 0))
se = 0.84
sp = 0.999
# Make weighted and unweighted survey object
un_weighted_df <- svydesign(id = ~1, weights = ~1, data = prev_df)
weighted_df <- survey::svydesign(id = ~1, weights = ~weights, data = prev_df)
# Calculate prevlanece without weights
prev_unweighted <- svyby(~covid_resultado,
by = ~location + donmo,
design = un_weighted_df,
FUN = svyciprop, method = "be", vartype = c("ci", "var")) %>% rename("var_unweighted" =  "var.as.numeric(covid_resultado)")
# Calculate prevalence with weights
prev_weighted <- svyby(~covid_resultado,
by = ~location + donmo,
design = weighted_df,
FUN = svyciprop, method = "be", vartype = c("ci", "var")) %>% rename("var_weighted" =  "var.as.numeric(covid_resultado)")
prevalence <- prev_unweighted %>%
rename(prev = covid_resultado) %>%
left_join(prev_weighted %>%
rename(
prev_weighted = covid_resultado,
ci_l_weighted = ci_l,
ci_u_weighted = ci_u
))
# Sens/specificity correction (method from https://doi.org/10.1155/2011/608719)
adjust_SensSpec <- function(p) {
adj <- (p + sp - 1) / (se + sp - 1)
adj <- ifelse(adj < 0, 0, adj)
return(adj)
}
prevalence <- prevalence %>%
mutate(prev_weighted_adj = adjust_SensSpec(prev_weighted),
ci_l_weighted_adj = adjust_SensSpec(ci_l_weighted),
ci_u_weighted_adj = adjust_SensSpec(ci_u_weighted))
prevalence <- prevalence %>%
mutate(prev_unweighted_adj = adjust_SensSpec(prev),
ci_l_unweighted_adj = adjust_SensSpec(ci_l),
ci_u_unweighted_adj = adjust_SensSpec(ci_u))
prevalence = prev_df %>%  group_by(donmo, location) %>% summarize(n = n()) %>% merge(prevalence, by = c("donmo", "location"))
prev_df <- read.csv("data/sero_consolidated_weights.csv", stringsAsFactors = F)
prev_df <- prev_df %>% mutate(covid_resultado = ifelse(abbott_result_alt == "Positive", 1, 0))
se = 0.925      #sensitivity
sp = 0.967      #specificity
# Make weighted and unweighted survey object
un_weighted_df <- svydesign(id = ~1, weights = ~1, data = prev_df)
weighted_df <- survey::svydesign(id = ~1, weights = ~weights, data = prev_df)
# Calculate prevlanece without weights
prev_unweighted <- svyby(~covid_resultado,
by = ~location + donmo,
design = un_weighted_df,
FUN = svyciprop, method = "be", vartype = c("ci", "var")) %>% rename("var_unweighted" =  "var.as.numeric(covid_resultado)")
# Calculate prevalence with weights
prev_weighted <- svyby(~covid_resultado,
by = ~location + donmo,
design = weighted_df,
FUN = svyciprop, method = "be", vartype = c("ci", "var")) %>% rename("var_weighted" =  "var.as.numeric(covid_resultado)")
prevalence <- prev_unweighted %>%
rename(prev = covid_resultado) %>%
left_join(prev_weighted %>%
rename(
prev_weighted = covid_resultado,
ci_l_weighted = ci_l,
ci_u_weighted = ci_u
))
# Sens/specificity correction (method from https://doi.org/10.1155/2011/608719)
adjust_SensSpec <- function(p) {
adj <- (p + sp - 1) / (se + sp - 1)
adj <- ifelse(adj < 0, 0, adj)
return(adj)
}
prevalence <- prevalence %>%
mutate(prev_weighted_adj = adjust_SensSpec(prev_weighted),
ci_l_weighted_adj = adjust_SensSpec(ci_l_weighted),
ci_u_weighted_adj = adjust_SensSpec(ci_u_weighted))
prevalence <- prevalence %>%
mutate(prev_unweighted_adj = adjust_SensSpec(prev),
ci_l_unweighted_adj = adjust_SensSpec(ci_l),
ci_u_unweighted_adj = adjust_SensSpec(ci_u))
prevalence = prev_df %>%  group_by(donmo, location) %>% summarize(n = n()) %>% merge(prevalence, by = c("donmo", "location"))
source("prevalence_functions.R")
# --- Get Prevalence Data --- #
Ns = 10000  #Number of bootstrap iterations
adjustspec = TRUE
smallthresh = FALSE
agesexn = TRUE
posc0 = correctedPrevalence(smallthresh, adjustspec, agesexn)
pop_MN = 1.793E6
pop_SP = 12.18E6
posc0 = posc0 %>% dplyr::filter(donmo >= 3 & donmo <= 10) %>% arrange(donmo) #Ignore cases before march
outputs = c("alphamin_MN", "alphamin_SP", "U_MN_MN", "U_SP_MN",  "pmin_SP", "pmin_MN")
for(x in outputs)
assign(paste0(x,"_s"), vector("list", Ns+1))
