# fix factors when re-importing from csv
rfmain <- read_csv('reframed-bjpsych.csv') %>% 
  mutate_at(vars(month, grp, site, patient, therapist), funs(factor))

Standardise some predictors:

rfmain.z <- rfmain %>% 
  MuMIn::stdize(
    omit.cols=c("site", "patient", "therapist", "month", "grp",
                "hrsd", "eac", "aaq", "phq9", "mssi"), 
    prefix=F) %>% 
  mutate_at(vars(patient, therapist), funs(factor)) %>% 
  as.tibble()

Primary outcomes

Model specification

makemodelformula <- function(dv){
as.formula(glue::glue(" {dv} ~
                   (grp * month * pd.b) +
                   (grp * month * site) +
                   (grp * month * {dv}.b) +
                   (grp * month * rdb_age_depression_onset) +
                   (1|patient) + (1|therapist)"))
}

An example:

makemodelformula("hrsd")
## hrsd ~ (grp * month * pd.b) + (grp * month * site) + (grp * month * 
##     hrsd.b) + (grp * month * rdb_age_depression_onset) + (1 | 
##     patient) + (1 | therapist)
## <environment: 0x563ac6619468>

Outcome models

Compute raw means, runs a model, and computes group means and planned contrasts for each outcome. Save results, including Anova terms, to a single large table: outcomes.

CONT.OUTCOMES <-  c("hrsd", "aaq", "phq9", "eac", "mssi", "ssq")
outcomes <- tibble(dv = CONT.OUTCOMES) %>% 
  group_by(dv) %>% 
  do(., 
     {
       f_ <- function(dv){
         #  dv <- "hrsd"
         frm <- makemodelformula(dv)
         
         m.rawmeans <- rfmain.z %>% 
           group_by(month, grp) %>% 
           do(., tibble(estimate=mean(pull(., dv), na.rm=T),
                        SD=sd(pull(., dv), na.rm=T),
                        N = sum(!is.na(pull(., dv))))) %>% 
           mutate(Type="Raw group means", term = glue::glue("{grp}, month {month}")) %>% 
           select(Type, term, grp, month,  estimate, SD, N)
         
         options(contrasts = c("contr.sum", "contr.poly"))
         m <- lmer(frm, data=rfmain.z)
        
         m.rand <- ranova(m) %>% 
           tidy() %>%
           mutate(term = ifelse(term=="<none>", "Residual", term)) %>% 
           rename(statistic = LRT) %>% select(term, statistic, df) %>% 
           mutate(Type="Anova variance terms")
         
         m.aov <- anova(m, ddf="Kenward-Roger") %>% tidy() %>% 
           mutate(Type="Anova fixed effects")
         
         m.diffs <- contrast(emmeans(m, specs=~grp|month, lmer.df = "kenward-roger"),
                             method="pairwise", adjust="none") 
         
         m.diffs.ci <- m.diffs %>% confint() %>% as.tibble() %>% 
           rename(conf.low=lower.CL, conf.high=upper.CL) %>% 
           select(month, contains("conf"))
         
         m.contrasts <- left_join(m.diffs %>% tidy(), 
                                  m.diffs.ci, by="month") %>% 
           mutate(term = glue::glue("{level1} - {level2} at month {month}")) %>% 
           select(term, month, estimate, df, statistic, p.value, contains("conf")) %>% 
           mutate(Type="Planned contrasts")
         
         m.means <-  emmeans(m, specs=~grp|month, lmer.df = "kenward-roger") %>% 
           tidy() %>% 
           mutate(term = glue::glue("{grp}, month {month}")) %>% 
           filter(month!=0) %>% 
           select(-df) %>% 
           mutate(Type="Estimated group means") 
         
         return(bind_rows(m.rawmeans, m.means, m.contrasts, m.rand, m.aov) %>% 
           select(Type, term, everything()))
         }
       f_(.$dv)
     }
  )
# save for later
saveRDS(outcomes, "outcomes.RDS")

Present all outcomes and results in a single table:

outcomes <- readRDS('outcomes.RDS')
outcomes.table <- outcomes %>% 
  ungroup() %>% 
  rename(DV=dv,
         Statistic = statistic,
         Estimate = estimate,
         Term = term,
         `_p_` = p.value,
         `Sum Sq.` = sumsq,
         `Mean Sq.` = meansq,
         `95% lower` = conf.low,
         `95% upper` = conf.high,
         Month=month
         ) %>% 
  select(DV, Type, Term, Month, Estimate, SD, contains('conf'), contains("Sq"),
                  contains("df"), Statistic, `_p_`) 

outcomes.table %>% 
  mutate(DV = toupper(DV)) %>%
  DT::datatable(class = 'cell-border stripe', 
                options=list(pageLength = 15)) %>% 
  DT::formatRound(digits=0, columns=c(
    "Month", 
    "df",        
    "NumDF"
  )) %>% 
  DT::formatRound(columns=c("DV",        
                             "Type",    
                             "Term",     
                            "SD",
                             "Estimate",  
                             "Sum Sq.",   
                             "Mean Sq."  ,
                             "DenDF", 
                             "Statistic", 
                             "_p_" )) 

Additionally, calculate SMDs:

baseline_sd <- rfmain %>% filter(month==0) %>% select(CONT.OUTCOMES) %>% 
  data.table::melt() %>% 
  group_by(variable) %>% 
  summarise(sdbase=sd(value, na.rm=T)) %>% 
  rename(DV=variable)

smdtab <- outcomes.table %>% 
  filter(Type=="Planned contrasts") %>% 
  filter(Month!=0) %>% 
  left_join(baseline_sd) %>% 
  mutate(SMD =  Estimate/sdbase ,2) %>% 
  select(DV, Month, Estimate, SMD) %>% 
  mutate_at(.vars=vars(Estimate, SMD), funs(round(., 2)))

smdtab %>% write_csv('smd.csv')

Note on Degrees of freedom: Our initial analysis plan did not specify which correction method should be used for the denominator degrees of freedom. Satterthwaite is the default in the lmerTest package, and was orignally used. However, we found that for some outcomes the DDF were implausibly high using this method and were queried by a reviewer. For our final report we switched to the Kenward Roger method; this did not affect inferences drawn.

Outcome plots

rfmain %>% 
  select(grp, month, hrsd, eac, aaq, mssi, ssq, phq9) %>% 
  melt(id.vars=c("grp", "month")) %>% 
  mutate(month=factor(as.numeric(as.character(month)))) %>% 
  mutate(VARIABLE=toupper(variable)) %>% 
  ggplot(aes(month, value, fill=grp)) + 
  geom_boxplot(outlier.size = .5, position=position_dodge(1)) + 
  facet_wrap(~VARIABLE, scales="free", ncol=2) + 
  ylab("") + xlab("Month") + 
  theme(legend.position = "bottom") + 
  scale_fill_manual("", values = c("darkgrey", "white")) 
Boxplot for each of the primary and secondary outcomes.

Boxplot for each of the primary and secondary outcomes.

ggsave('outcomes-boxplot.png')
# outcomes <- readRDS('outcomes.RDS')
outcomes %>% 
  filter(Type=="Planned contrasts") %>% 
  mutate(month=factor(month)) %>% 
  ggplot(aes(month, estimate, ymin=conf.low, ymax=conf.high, group=1)) + 
  geom_pointrange() + geom_line() +
  facet_wrap(~toupper(dv), scales="free", ncol=2) +
  ylab("Group difference (RO-DBT — TAU) with 95% CI") + 
  xlab("Month") +
  theme(panel.grid.major = element_line( color="lightgrey"),
        panel.grid.minor = element_line(linetype = "dotted", color="lightgrey", size=.25)) 
Group differences at months 0, 7, 12 and 18 with 95% CI

Group differences at months 0, 7, 12 and 18 with 95% CI

ggsave('contrasts-lineplot.png')

Bayesian analysis

A number of reviewers asked us to comment on the preponderance of the evidence from our trial, given the lower than expected power achieved (due to under-recruitment) and the non-significant p values found for the 12 and 18 month followups for the HRSD. We were also asked to justify a claim in an earlier draft that RO-DBT was “much better” than TAU at the 7 month followup, given the relatively wide confidence interval for this contrast (-9.840835 to -0.9521508).

We were happy to add a Bayesian re-analysis of our between-group contrasts. We had already run suitable models in computing the therapist-related ICC, and to simulate predicted outcome for new patients as part of our NIHR report. Our models are parameterised in the same way as our primary analyses, using makemodelformula() above. Model fitting is done with Stan and the brms package.

Calculating the ICC for therapists could have been somewhat sensitive to selection of priors, and we chose pessimistic (zero centered), weakly-informative priors for relevant model terms, including the treatment effect. These imply we viewed treatment effects of abs(D) > 1 as relatively unlikely. For variance parameters we report results using the brms default student_t prior. This constrains variance parameters to be positive, but is otherwise relatively broad.

Details of the Bayesian model fitting

m.brm <- brm(makemodelformula('hrsd'), 
             data=rfmain.z, 
             prior = c(set_prior("normal(0, 16)", class="b"), 
                       set_prior("normal(20, 16)", class="Intercept")),
             sample_prior=TRUE, 
             save_all_pars = TRUE,
             iter=ITER,
             chains=CHAINS)
# save MCMC chain/model object for resuse later
saveRDS(m.brm, "m.brm.RDS")

Predict from the model. Subtract predicted scores by-group to estimate the treatment effect:

m.brm <- readRDS("m.brm.RDS")
# use lmer for convenience to get the model frame for predictions
m <- lmer(makemodelformula('hrsd'), data=rfmain.z)
nd <- bind_rows(m@frame %>%  mutate(grp="RO-DBT"), 
                m@frame %>%  mutate(grp="TAU")) %>% 
  mutate(grp=factor(grp))

fits_ <- add_fitted_samples(model = m.brm, newdata = nd)
fits <- fits_ %>% group_by(month, grp, .iteration) %>% 
  summarise(estimate=mean(estimate))

D <- fits %>% dcast(.iteration+month~grp, value.var="estimate") %>% 
  mutate(d=`RO-DBT`-TAU) %>% 
  group_by(month)

Treatment effect estimates

Compare with models fit by ML; tabulate the treatment effects estimates by-month:

D %>% 
  group_by(month) %>% 
  mean_hdi(d) %>% 
  rename(`RO-DBT — TAU`=d, 
         Lower=.lower, 
         Upper=.upper, 
         Month=month) %>% select(-.width, -.point, -.interval) %>% 
  mutate_if(is.numeric, funs(sprintf("%.2f",.))) %>% 
  pander("Treatment effects by month, with 95% credible interval (HPDI), fitted via MCMC.")
Treatment effects by month, with 95% credible interval (HPDI), fitted via MCMC.
Month RO-DBT — TAU Lower Upper
0 -0.10 -3.92 3.70
7 -4.03 -8.07 -0.23
12 -1.83 -5.69 2.13
18 -1.67 -5.65 2.23
D %>%
  mutate(Month=factor(month, levels=c(0,7,12,18))) %>% 
  ggplot(aes(y=Month, x=d)) +
  tidybayes::geom_halfeyeh(point_interval = mean_hdi) +
  geom_vline(xintercept = 0, linetype="dotted", color="darkgrey") + 
  xlab("RO-DBT — TAU (HRSD)")
Posterior estimates of mean treatment effect with 95% and 66% highest density intervals.

Posterior estimates of mean treatment effect with 95% and 66% highest density intervals.

Evidence ratios for superiority of RO-DBT

Use brms::hypothesis to test the hypothesis that RO-DBT superior at each followup (and baseline):

bf.directed.hrsd <- 
  D %>% 
  do(., hypothesis(., c("d < 0","d < -2")) %>% .$hypothesis) %>% 
  select(month, Hypothesis, Evid.Ratio) %>% 
  dcast(month~Hypothesis, value.var="Evid.Ratio") %>% 
  setnames(c("month", "ER(d > 0)", "ER(d > 2)")) 

bf.directed.hrsd %>% 
  rename(Month=month) %>% 
  mutate_if(is.numeric, funs(sprintf("%.2f",.))) %>% 
  pander(caption="Posterior evidence ratios for RO-DBT superiority over TAU")
Posterior evidence ratios for RO-DBT superiority over TAU
Month ER(d > 0) ER(d > 2)
0 1.09 0.16
7 43.71 6.68
12 5.53 0.84
18 4.62 0.74

Bayesian models for secondary outcomes

Re-run all secondary outcome models with brms:

secondary.stan.models <- tibble(dv = CONT.OUTCOMES[CONT.OUTCOMES!="hrsd"]) %>% 
  group_by(dv) %>% 
  do(., 
     {
      dv <- .$dv # dv <- "eac"
      frm <- makemodelformula(dv)
      m <- brm(makemodelformula(dv), 
               data=rfmain.z, 
               iter=ITER,
               chains=CHAINS,
               # same fixed effect priors as default rstanarm priors
               prior=c(set_prior("normal(0, 10)", class="Intercept"), 
                       set_prior("normal(0, 2.5)", class="b")))
      saveRDS(m, glue("{dv}.brm.RDS"))
      tibble(filename = glue("{dv}.brm.RDS"))
     }
  )
secondary.stan.models <- read_csv('secondary.stan.models.csv')
secondary.D <- secondary.stan.models %>% 
  group_by(dv, filename) %>% 
  do(., {
    dv <- .$dv
    m.ml <- lmer(makemodelformula(dv), data=rfmain.z)
    nd <- bind_rows(m.ml@frame %>%  mutate(grp="RO-DBT"), 
                m.ml@frame %>%  mutate(grp="TAU")) %>% 
      mutate(grp=factor(grp))
  
    stanmodel <- readRDS(.$filename)
    fits_ <- add_fitted_samples(model = stanmodel, newdata = nd)
    fits <- fits_ %>% group_by(month, grp, .iteration) %>% 
      summarise(estimate=mean(estimate))
    
    D <- fits %>% dcast(.iteration+month~grp, value.var="estimate") %>% 
      mutate(d=`RO-DBT`-TAU) %>% 
      group_by(month)
    D
  })

saveRDS(secondary.D, "secondary.D.RDS")

Note that we invert the BF for EAC because larger numbers indicate positive outcomes, and BF10 sometimes easier to think about than BF01.

secondary.D <- readRDS("secondary.D.RDS")

revrsed.vars <- c("eac")
secondary.bf <- secondary.D %>% 
  group_by(dv, month) %>% 
  do(., {
      hypothesis(., c("d < 0")) %>% 
      .$hypothesis }) %>% 
  mutate(Evid.Ratio = ifelse(dv %in% revrsed.vars, 1/Evid.Ratio, Evid.Ratio)) %>% 
  select(dv, month, Hypothesis, Evid.Ratio) %>% 
  rename(`ER(d > 0)` = Evid.Ratio)

all.bf <- 
  bind_rows(bf.directed.hrsd %>% mutate(dv="hrsd"), secondary.bf) %>% 
  select(-Hypothesis)

all.bf %>% 
  select(dv, month, contains('ER')) %>% 
  rename(DV=dv, Month=month) %>% 
  dcast(DV ~ Month, value.var="ER(d > 0)") %>% 
  pander(caption="Bayes factors for the hypothesis that RO-DBT is superior to TAU")
Bayes factors for the hypothesis that RO-DBT is superior to TAU
DV 0 7 12 18
aaq 0.7682 30.47 121.8 55.98
eac 0.9965 36.36 3242 278.7
hrsd 1.093 43.71 5.531 4.622
mssi 0.9515 17.58 5.136 1.516
phq9 0.8598 30.82 33.64 12.39
ssq 0.9186 0.1496 0.09623 0.202

Cumulative probability plot

Plot the cumulative posterior probability of effects of varying sizes. The intersection of the coloured lines with 0.5 on the y axis represents the posterior mean for the effect size of RO-DBT.

pforD <- function(df, val){ 
  df %>% 
    group_by(month) %>% 
    mutate(e = -1*value > val) %>% 
    summarise(p = mean(e), d=val)
}

mdf <- D %>% 
  group_by(month, .iteration) %>% 
  summarise(value=mean(d)) 

ropes.hamd <- tibble(d = seq(0, 8, .05)) %>% 
  rowwise() %>% 
  do(., pforD(mdf, .$d))
ropes.hamd %>% 
  mutate(Month=factor(month, levels=c(0,7,12,18))) %>% 
  ggplot(aes(x=d, y=p, color=Month)) + 
  geom_hline(yintercept = 1, color="darkgrey", size=.25) +
  # geom_vline(xintercept = 2, color="grey", size=.25) +
  geom_line() + 
  xlab("D = (RO-DBT — TAU)") + 
  ylab('Probability D >= X') +
  scale_y_continuous(breaks=c(.5, .75, .9, .95), limits=c(0,1), expand=c(0,0)) +
  scale_x_continuous(breaks=c(0, 1, 2, 4, 6, 8), limits=c(0,8), expand = c(0,0)) +
  theme(panel.grid.major = element_line(linetype = "dotted", color="lightgrey"))
Cumulative probability plot for treatment effects by-month. The y axis gives the probability that the treatment effect will match or exceed the value of x.

Cumulative probability plot for treatment effects by-month. The y axis gives the probability that the treatment effect will match or exceed the value of x.

ggsave('cumulaive-probability.png')

Therapist heterogeneity

Tharapist-variance estimates via ML are known to be problematic when the number of therapists is small, so we estimated therapist ICCs from the MCMC model fits. As noted, above, we used the default variance priors in brms which were weakly informative and pessimistic (highest probability density at zero).

We tested three hypotheses: i) that therapist variance was be non-zero; ii) that therapist variance exceeded our expected level of 2.5% (which we based our protocol power calculations on) and iii) that therapist variance exceeded 10%.

m.brm <- readRDS('m.brm.RDS')
icc.expr <- "(sqrt(sd_therapist__Intercept) / ( sqrt(sd_therapist__Intercept) + sqrt(sd_patient__Intercept) + sigma) )"

iccs <- hypothesis(m.brm, c(`No variability`=glue("{icc.expr} = 0"),
                    `Greater than 2.5% expected`=glue("{icc.expr} > .025"),
                    `Greater than 10%`=glue("{icc.expr}  > .1")), class=NULL) %>% 
  .$hypothesis

therapist.icc <-( iccs$Estimate[1] * 100) %>% round(1)
iccs %>% 
  select(Hypothesis, Evid.Ratio) %>% 
  mutate_if(is.numeric, funs(sprintf("%.2f", .))) %>% 
  pander(justify=c("left", "centre"), caption=glue::glue("Tests that the therapist ICC ({therapist.icc}%) was greater than 0, 2.5% or 10%."))
Tests that the therapist ICC (14.2%) was greater than 0, 2.5% or 10%.
Hypothesis Evid.Ratio
No variability 0.05
Greater than 2.5% expected 216.39
Greater than 10% 8.38

To check our priors were not unduly influencing results (e.g. inflating ICC by placing prior plausibility on values > 0), we reran the model, but halving/doubling the scale of the variance priors: results were not materially different, suggesting the estimate was not strongly influenced by the prior.

To estimate the difference between best and worst therapists we simulated from the main model:

thrpst.effects <- m.brm %>% as.tibble() %>% 
  select(dplyr::matches('r_therapist\\.\\d+\\.Intercept')) %>% 
  melt() %>% 
  group_by(variable) %>% 
  mean_hdi(value, .prob = .9) %>% 
  arrange(value) %>% 
  ungroup() %>% 
  mutate(therapist = factor(row_number()))

thrpst.effects %>% 
  summarise(diff(c(max(value), min(value))))
## # A tibble: 1 x 1
##   `diff(c(max(value), min(value)))`
##                               <dbl>
## 1                             -2.73