bootstrap_OR.Rd
Bootstrapping for MAIC weighted odds ratios
bootstrap_OR( intervention_data, matching, i, model, comparator_data, min_weight = 1e-04 )
intervention_data | A data frame containing individual patient data from the intervention study. |
---|---|
matching | A character vector giving the names of the covariates to use in matching. These names must match the column names in intervention_data. |
i | Index used to select a sample within |
model | A model formula in the form 'endpoint ~ treatment_var'. Variable names need to match the corresponding columns in intervention_data. |
comparator_data | A data frame containing pseudo individual patient data from the comparator study needed to derive the relative treatment effect. The outcome variables names must match intervention_data. |
min_weight | A numeric value that defines the minimum weight allowed. This value (default 0.0001) will replace weights estimated at 0 in a sample. |
The OR as a numeric value.
This function is intended to be used in conjunction with the
boot
function to return the statistic to be
bootstrapped. In this case by performing MAIC weighting using
estimate_weights and returning a weighted odds ratio (OR) from a
logistic regression model. This is used as the 'statistic' argument in
the boot function.
# This example code combines weighted individual patient data for 'intervention' # and pseudo individual patient data for 'comparator' and performs analyses for # two endpoints: overall survival (a time to event outcome) and objective # response (a binary outcome) library(dplyr) library(boot) library(survival) library(MAIC) library(ggplot2) library(survminer) library(flextable) library(officer) # load intervention data with weights saved in est_weights data(est_weights, package = "MAIC") # Combine data ----------------------------------------------------------------- # Combine the the comparator pseudo data with the analysis data, outputted from # the estimate_weights function # Read in digitised pseudo survival data for the comparator comparator_surv <- read.csv(system.file("extdata", "psuedo_IPD.csv", package = "MAIC", mustWork = TRUE)) %>% rename(Time=Time, Event=Event) # Simulate response data based on the known proportion of responders comparator_n <- nrow(comparator_surv) # total number of patients in the comparator data comparator_prop_events <- 0.4 # proportion of responders # Calculate number with event # Use round() to ensure we end up with a whole number of people # number without an event = Total N - number with event to ensure we keep the # same number of patients n_with_event <- round(comparator_n*comparator_prop_events, digits = 0) comparator_binary <- data.frame("response"= c(rep(1, n_with_event), rep(0, comparator_n - n_with_event))) # Join survival and response comparator data # Note the rows do not represent observations from a particular patient # i.e. there is no relationship between the survival time and response status # for a given row since this is simulated data # In a real data set this relationship would be present comparator_input <- cbind(comparator_surv, comparator_binary) %>% mutate(wt=1, wt_rs=1, ARM="Comparator") # All patients have weight = 1 head(comparator_input)#> Time Event response wt wt_rs ARM #> 1 20.2311676 1 1 1 1 Comparator #> 2 28.7679537 1 1 1 1 Comparator #> 3 41.0662129 0 1 1 1 Comparator #> 4 0.8492261 1 1 1 1 Comparator #> 5 9.0521882 0 1 1 1 Comparator #> 6 3.4450075 1 1 1 1 Comparator# Join comparator data with the intervention data (naming of variables should be # consistent between both datasets) # Set factor levels to ensure "Comparator" is the reference treatment combined_data <- bind_rows(est_weights$analysis_data, comparator_input) combined_data$ARM <- relevel(as.factor(combined_data$ARM), ref="Comparator") #### Estimating the relative effect -------------------------------------------- ### Example for survival data -------------------------------------------------- ## Kaplan-Meier plot # Unweighted intervention data KM_int <- survfit(formula = Surv(Time, Event==1) ~ 1 , data = est_weights$analysis_data, type="kaplan-meier") # Weighted intervention data KM_int_wtd <- survfit(formula = Surv(Time, Event==1) ~ 1 , data = est_weights$analysis_data, weights = wt, type="kaplan-meier") # Comparator data KM_comp <- survfit(formula = Surv(Time, Event==1) ~ 1 , data = comparator_input, type="kaplan-meier") # Combine the survfit objects ready for ggsurvplot KM_list <- list(Intervention = KM_int, Intervention_weighted = KM_int_wtd, Comparator = KM_comp) #Produce the Kaplan-Meier plot KM_plot <- ggsurvplot(KM_list, combine = TRUE, risk.table=TRUE, # numbers at risk displayed on the plot break.x.by=50, xlab="Time (days)", censor=FALSE, legend.title = "Treatment", title = "Kaplan-Meier plot of overall survival", legend.labs=c("Intervention", "Intervention weighted", "Comparator"), font.legend = list(size = 10)) + guides(colour=guide_legend(nrow = 2))#> Warning: Vectorized input to `element_text()` is not officially supported. #> Results may be unexpected or may change in future versions of ggplot2.#> Warning: Vectorized input to `element_text()` is not officially supported. #> Results may be unexpected or may change in future versions of ggplot2.#> Warning: Vectorized input to `element_text()` is not officially supported. #> Results may be unexpected or may change in future versions of ggplot2.## Estimating the hazard ratio (HR) # Fit a Cox model without weights to estimate the unweighted HR unweighted_cox <- coxph(Surv(Time, Event==1) ~ ARM, data = combined_data) HR_CI_cox <- summary(unweighted_cox)$conf.int %>% as.data.frame() %>% transmute("HR" = `exp(coef)`, "HR_low_CI" = `lower .95`, "HR_upp_CI" = `upper .95`) # Fit a Cox model with weights to estimate the weighted HR weighted_cox <- coxph(Surv(Time, Event==1) ~ ARM, data = combined_data, weights = wt) HR_CI_cox_wtd <- summary(weighted_cox)$conf.int %>% as.data.frame() %>% transmute("HR" = `exp(coef)`, "HR_low_CI" = `lower .95`, "HR_upp_CI" = `upper .95`) ## Bootstrap the confidence interval of the weighted HR HR_bootstraps <- boot(data = est_weights$analysis_data, # intervention data statistic = bootstrap_HR, # bootstrap the HR (defined in the MAIC package) R=1000, # number of bootstrap samples comparator_data = comparator_input, # comparator pseudo data matching = est_weights$matching_vars, # matching variables model = Surv(Time, Event==1) ~ ARM # model to fit ) ## Bootstrapping diagnostics # Summarize bootstrap estimates in a histogram # Vertical lines indicate the median and upper and lower CIs hist(HR_bootstraps$t, main = "", xlab = "Boostrapped HR")# Median of the bootstrap samples HR_median <- median(HR_bootstraps$t) # Bootstrap CI - Percentile CI boot_ci_HR <- boot.ci(boot.out = HR_bootstraps, index=1, type="perc") # Bootstrap CI - BCa CI boot_ci_HR_BCA <- boot.ci(boot.out = HR_bootstraps, index=1, type="bca") ## Summary # Produce a summary of HRs and CIs HR_summ <- rbind(HR_CI_cox, HR_CI_cox_wtd) %>% # Unweighted and weighted HRs and CIs from Cox models mutate(Method = c("HR (95% CI) from unadjusted Cox model", "HR (95% CI) from weighted Cox model")) %>% # Median bootstrapped HR and 95% percentile CI rbind(data.frame("HR" = HR_median, "HR_low_CI" = boot_ci_HR$percent[4], "HR_upp_CI" = boot_ci_HR$percent[5], "Method"="Bootstrap median HR (95% percentile CI)")) %>% # Median bootstrapped HR and 95% bias-corrected and accelerated bootstrap CI rbind(data.frame("HR" = HR_median, "HR_low_CI" = boot_ci_HR_BCA$bca[4], "HR_upp_CI" = boot_ci_HR_BCA$bca[5], "Method"="Bootstrap median HR (95% BCa CI)")) %>% #apply rounding for numeric columns mutate_if(.predicate = is.numeric, sprintf, fmt = "%.3f") %>% #format for output transmute(Method, HR_95_CI = paste0(HR, " (", HR_low_CI, " to ", HR_upp_CI, ")")) # Summarize the results in a table suitable for word/ powerpoint HR_table <- HR_summ %>% regulartable() %>% #make it a flextable object set_header_labels(Method = "Method", HR_95_CI = "Hazard ratio (95% CI)") %>% font(font = 'Arial', part = 'all') %>% fontsize(size = 14, part = 'all') %>% bold(part = 'header') %>% align(align = 'center', part = 'all') %>% align(j = 1, align = 'left', part = 'all') %>% border_outer(border = fp_border()) %>% border_inner_h(border = fp_border()) %>% border_inner_v(border = fp_border()) %>% autofit(add_w = 0.2, add_h = 2) ### Example for response data -------------------------------------------------- ## Estimating the odds ratio (OR) # Fit a logistic regression model without weights to estimate the unweighted OR unweighted_OR <- glm(formula = response~ARM, family = binomial(link="logit"), data = combined_data) # Log odds ratio log_OR_CI_logit <- cbind(coef(unweighted_OR), confint.default(unweighted_OR, level = 0.95))[2,] # Exponentiate to get Odds ratio OR_CI_logit <- exp(log_OR_CI_logit) #tidy up naming names(OR_CI_logit) <- c("OR", "OR_low_CI", "OR_upp_CI") # Fit a logistic regression model with weights to estimate the weighted OR weighted_OR <- suppressWarnings(glm(formula = response~ARM, family = binomial(link="logit"), data = combined_data, weight = wt)) # Weighted log odds ratio log_OR_CI_logit_wtd <- cbind(coef(weighted_OR), confint.default(weighted_OR, level = 0.95))[2,] # Exponentiate to get weighted odds ratio OR_CI_logit_wtd <- exp(log_OR_CI_logit_wtd) #tidy up naming names(OR_CI_logit_wtd) <- c("OR", "OR_low_CI", "OR_upp_CI") ## Bootstrap the confidence interval of the weighted OR OR_bootstraps <- boot(data = est_weights$analysis_data, # intervention data statistic = bootstrap_OR, # bootstrap the OR R = 1000, # number of bootstrap samples comparator_data = comparator_input, # comparator pseudo data matching = est_weights$matching_vars, # matching variables model = 'response ~ ARM' # model to fit ) ## Bootstrapping diagnostics # Summarize bootstrap estimates in a histogram # Vertical lines indicate the median and upper and lower CIs hist(OR_bootstraps$t, main = "", xlab = "Boostrapped OR")# Median of the bootstrap samples OR_median <- median(OR_bootstraps$t) # Bootstrap CI - Percentile CI boot_ci_OR <- boot.ci(boot.out = OR_bootstraps, index=1, type="perc") # Bootstrap CI - BCa CI boot_ci_OR_BCA <- boot.ci(boot.out = OR_bootstraps, index=1, type="bca") ## Summary # Produce a summary of ORs and CIs OR_summ <- rbind(OR_CI_logit, OR_CI_logit_wtd) %>% # Unweighted and weighted ORs and CIs as.data.frame() %>% mutate(Method = c("OR (95% CI) from unadjusted logistic regression model", "OR (95% CI) from weighted logistic regression model")) %>% # Median bootstrapped HR and 95% percentile CI rbind(data.frame("OR" = OR_median, "OR_low_CI" = boot_ci_OR$percent[4], "OR_upp_CI" = boot_ci_OR$percent[5], "Method"="Bootstrap median HR (95% percentile CI)")) %>% # Median bootstrapped HR and 95% bias-corrected and accelerated bootstrap CI rbind(data.frame("OR" = OR_median, "OR_low_CI" = boot_ci_OR_BCA$bca[4], "OR_upp_CI" = boot_ci_OR_BCA$bca[5], "Method"="Bootstrap median HR (95% BCa CI)")) %>% #apply rounding for numeric columns mutate_if(.predicate = is.numeric, sprintf, fmt = "%.3f") %>% #format for output transmute(Method, OR_95_CI = paste0(OR, " (", OR_low_CI, " to ", OR_upp_CI, ")")) # turns the results to a table suitable for word/ powerpoint OR_table <- OR_summ %>% regulartable() %>% #make it a flextable object set_header_labels(Method = "Method", OR_95_CI = "Odds ratio (95% CI)") %>% font(font = 'Arial', part = 'all') %>% fontsize(size = 14, part = 'all') %>% bold(part = 'header') %>% align(align = 'center', part = 'all') %>% align(j = 1, align = 'left', part = 'all') %>% border_outer(border = fp_border()) %>% border_inner_h(border = fp_border()) %>% border_inner_v(border = fp_border()) %>% autofit(add_w = 0.2)