library(tidyverse)
library(kableExtra)

load("man_quality.RData")

# Table 1 --------------------------------------------------


summary_with_article_n <- man_quality %>% 
    filter(link_desc == "ganzer Artikel") %>% 
    ungroup() %>% 
    summarise(
        n = n(),
        Interventional = sum(summarized_study_type == "Interventional", na.rm = T),
        start_mean = mean(summarized_start_date, na.rm = T),
        start_sd = sd(summarized_start_date, na.rm = T),
        n_mean = mean(summarized_n, na.rm = T),
        n_sd = sd(summarized_n, na.rm = T),
        `Pre-Registered` = sum(summarized_pre, na.rm = T),
        `KKS` = sum(is_koor, na.rm = T),
        `U of Excellence` = sum(is_exz, na.rm = T),
        `U of Exc. & KKS` = sum(is_exz & is_koor, na.rm = T),
        `On CT.gov` = sum(has_nct, na.rm = T),
        `On DRKS` = sum(has_drks, na.rm = T),
        `On EUCTR` = sum(has_euctr, na.rm = T)
    ) %>% 
    dplyr::select(-start_mean, -start_sd, -n_mean, -n_sd)

summary_with_article_perc <- man_quality %>% 
    filter(link_desc == "ganzer Artikel") %>% 
    ungroup() %>% 
    summarise(
        n = n() / nrow(man_quality), 
        Interventional = mean(summarized_study_type == "Interventional", na.rm = T),
        `Pre-Registered` = mean(summarized_pre, na.rm = T),
        `KKS` = mean(is_koor, na.rm = T),
        `U of Excellence` = mean(is_exz, na.rm = T),
        `U of Exc. & KKS` = mean(is_exz & is_koor, na.rm = T),
        `On CT.gov` = mean(has_nct, na.rm = T),
        `On DRKS` = mean(has_drks, na.rm = T),
        `On EUCTR` = mean(has_euctr, na.rm = T)
    ) 


summary_without_article_n <- man_quality %>% 
    filter(link_desc != "ganzer Artikel") %>% 
    ungroup() %>% 
    summarise(
        n = n(),
        Interventional = sum(summarized_study_type == "Interventional", na.rm = T),
        start_mean = mean(summarized_start_date, na.rm = T),
        start_sd = sd(summarized_start_date, na.rm = T),
        n_mean = mean(summarized_n, na.rm = T),
        n_sd = sd(summarized_n, na.rm = T),
        `Pre-Registered` = sum(summarized_pre, na.rm = T),
        `KKS` = sum(is_koor, na.rm = T),
        `U of Excellence` = sum(is_exz, na.rm = T),
        `U of Exc. & KKS` = sum(is_exz & is_koor, na.rm = T),
        `On CT.gov` = sum(has_nct, na.rm = T),
        `On DRKS` = sum(has_drks, na.rm = T),
        `On EUCTR` = sum(has_euctr, na.rm = T)
    ) |> 
    dplyr::select(-start_mean, -start_sd, -n_mean, -n_sd)

summary_without_article_perc <- man_quality %>% 
    filter(link_desc != "ganzer Artikel") %>% 
    ungroup() %>% 
    summarise(
        n = n() / nrow(man_quality), 
        Interventional = mean(summarized_study_type == "Interventional", na.rm = T),
        `Pre-Registered` = mean(summarized_pre, na.rm = T),
        `KKS` = mean(is_koor, na.rm = T),
        `U of Excellence` = mean(is_exz, na.rm = T),
        `U of Exc. & KKS` = mean(is_exz & is_koor, na.rm = T),
        `On CT.gov` = mean(has_nct, na.rm = T),
        `On DRKS` = mean(has_drks, na.rm = T),
        `On EUCTR` = mean(has_euctr, na.rm = T)
    ) 

summary_man_quality_n <- man_quality %>% 
    ungroup() %>% 
    summarise(
        n = n(),
        Interventional = sum(summarized_study_type == "Interventional", na.rm = T),
        start_mean = mean(summarized_start_date, na.rm = T),
        start_sd = sd(summarized_start_date, na.rm = T),
        n_mean = mean(summarized_n, na.rm = T),
        n_sd = sd(summarized_n, na.rm = T),
        `Pre-Registered` = sum(summarized_pre, na.rm = T),
        `KKS` = sum(is_koor, na.rm = T),
        `U of Excellence` = sum(is_exz, na.rm = T),
        `U of Exc. & KKS` = sum(is_exz & is_koor, na.rm = T),
        `On CT.gov` = sum(has_nct, na.rm = T),
        `On DRKS` = sum(has_drks, na.rm = T),
        `On EUCTR` = sum(has_euctr, na.rm = T)
    ) %>% 
    dplyr::select(-start_mean, -start_sd, -n_mean, -n_sd)

summary_man_quality_perc <- man_quality %>% 
    ungroup() %>% 
    summarise(
        n = 1,
        Interventional = mean(summarized_study_type == "Interventional", na.rm = T),
        `Pre-Registered` = mean(summarized_pre, na.rm = T),
        `KKS` = mean(is_koor, na.rm = T),
        `U of Excellence` = mean(is_exz, na.rm = T),
        `U of Exc. & KKS` = mean(is_exz & is_koor, na.rm = T),
        `On CT.gov` = mean(has_nct, na.rm = T),
        `On DRKS` = mean(has_drks, na.rm = T),
        `On EUCTR` = mean(has_euctr, na.rm = T)
    ) 

summary_with_article_n <- summary_with_article_n %>%
    mutate_if(is.numeric, ~round(., 1)) %>%
    mutate_all(as.character) %>%
    pivot_longer(everything())

summary_with_article_perc <- summary_with_article_perc %>%
    mutate_if(is.numeric, ~round(. * 100, 1)) %>%
    mutate_all(as.character) %>%
    pivot_longer(everything())

summary_without_article_n <- summary_without_article_n %>%
    mutate_if(is.numeric, ~round(., 1)) %>%
    mutate_all(as.character) %>%
    pivot_longer(everything())

summary_without_article_perc <- summary_without_article_perc %>%
    mutate_if(is.numeric, ~round(. * 100, 1)) %>%
    mutate_all(as.character) %>%
    pivot_longer(everything())

summary_man_quality_n <- summary_man_quality_n %>% 
    mutate_if(is.numeric, ~round(., 1)) %>% 
    mutate_all(as.character) %>% 
    pivot_longer(everything())

summary_man_quality_perc <- summary_man_quality_perc %>% 
    mutate_if(is.numeric, ~round(. * 100, 1)) %>% 
    mutate_all(as.character) %>% 
    pivot_longer(everything())

testtab <- table(ifelse(man_quality$link_desc != "ganzer Artikel", "no article", "article"),
                 ifelse(man_quality$has_drks, "On DRKS", "Not on DRKS"))
drks_test <- prop.test(testtab)
drks_chivalue <- paste0(sprintf('%.2f', drks_test$statistic), " (", round(drks_test$parameter, 2), ")")
drks_prop_diff <- -round(diff(drks_test$estimate), 2)
drks_ci <- paste0(drks_prop_diff, " [", round(drks_test$conf.int[1], 2), ", ", round(drks_test$conf.int[2], 2), "]")
drks_tvalue <- ""
drks_pvalue <- sprintf('%.2f', drks_test$p.value)
testtab <- table(man_quality$link_desc != "ganzer Artikel", man_quality$has_euctr)
euctr_test <- prop.test(testtab)
euctr_chivalue <- paste0(sprintf('%.2f', euctr_test$statistic), " (", round(euctr_test$parameter, 2), ")")
euctr_prop_diff <- -round(diff(euctr_test$estimate), 2)
euctr_ci <- paste0(euctr_prop_diff, " [", round(euctr_test$conf.int[1], 2), ", ", round(euctr_test$conf.int[2], 2), "]")
euctr_tvalue <- ""
euctr_pvalue <- sprintf('%.2f', euctr_test$p.value)
testtab <- table(man_quality$link_desc != "ganzer Artikel", man_quality$has_nct)
nct_test <- prop.test(testtab)
nct_chivalue <- paste0(sprintf('%.2f', nct_test$statistic), " (", round(nct_test$parameter, 2), ")")
nct_prop_diff <- -round(diff(nct_test$estimate), 2)
nct_ci <- paste0(nct_prop_diff, " [", round(nct_test$conf.int[1], 2), ", ", round(nct_test$conf.int[2], 2), "]")
nct_tvalue <- ""
nct_pvalue <- sprintf('%.2f', nct_test$p.value)
testtab <- table(man_quality$link_desc != "ganzer Artikel", man_quality$is_exz)
exz_test <- prop.test(testtab)
exz_chivalue <- paste0(sprintf('%.2f', exz_test$statistic), " (", round(exz_test$parameter, 2), ")")
exz_prop_diff <- -round(diff(exz_test$estimate), 2)
exz_ci <- paste0(exz_prop_diff, " [", round(exz_test$conf.int[1], 2), ", ", round(exz_test$conf.int[2], 2), "]")
exz_tvalue <- ""
exz_pvalue <- sprintf('%.2f', exz_test$p.value)
testtab <- table(man_quality$link_desc != "ganzer Artikel", man_quality$is_koor)
koor_test <- prop.test(testtab)
koor_chivalue <- paste0(sprintf('%.2f', koor_test$statistic), " (", round(koor_test$parameter, 2), ")")
koor_prop_diff <- -round(diff(koor_test$estimate), 2)
koor_ci <- paste0(koor_prop_diff, " [", round(koor_test$conf.int[1], 2), ", ", round(koor_test$conf.int[2], 2), "]") 
koor_tvalue <- ""
koor_pvalue <- sprintf('%.2f', koor_test$p.value)
testtab <- table(man_quality$link_desc != "ganzer Artikel", man_quality$is_koor & man_quality$is_exz)
exzkoor_test <- prop.test(testtab)
exzkoor_chivalue <- paste0(sprintf('%.2f', exzkoor_test$statistic), " (", round(exzkoor_test$parameter, 2), ")")
exzkoor_prop_diff <- -round(diff(exzkoor_test$estimate), 2)
exzkoor_ci <- paste0(exzkoor_prop_diff, " [", round(exzkoor_test$conf.int[1], 2), ", ", round(exzkoor_test$conf.int[2], 2), "]")
exzkoor_tvalue <- ""
exzkoor_pvalue <- sprintf('%.2f', exzkoor_test$p.value)
testtab <- table(man_quality$link_desc != "ganzer Artikel", man_quality$summarized_pre)
pre_test <- prop.test(testtab)
pre_chivalue <- paste0(sprintf('%.2f', pre_test$statistic), " (", round(pre_test$parameter, 2), ")")
pre_prop_diff <- -round(diff(pre_test$estimate), 2)
pre_ci <- paste0(pre_prop_diff, " [", round(pre_test$conf.int[1], 2), ", ", round(pre_test$conf.int[2], 2), "]")
pre_tvalue <- ""
pre_pvalue <- sprintf('%.2f', pre_test$p.value)
if (pre_pvalue == "0.00") pre_pvalue <- "<0.01"
testtab <- table(man_quality$link_desc != "ganzer Artikel",
                 man_quality$summarized_study_type == "Interventional")
interv_test <- prop.test(testtab)
interv_chivalue <- paste0(sprintf('%.2f', interv_test$statistic), " (", round(interv_test$parameter, 2), ")")
interv_prop_diff <- -round(diff(interv_test$estimate), 2)
interv_ci <- paste0(interv_prop_diff, " [", round(interv_test$conf.int[1], 2), ", ", round(interv_test$conf.int[2], 2), "]")
interv_tvalue <- ""
interv_pvalue <- sprintf('%.2f', interv_test$p.value)
testtab <- table(man_quality$link_desc != "ganzer Artikel",
                 man_quality$summarized_study_type %in% c("Observational",
                                                          "Observational [Patient Registry]",
                                                          "Non-interventional"))
obs_test <- prop.test(testtab)
obs_chivalue <- paste0(sprintf('%.2f', obs_test$statistic), " (", round(obs_test$parameter, 2), ")")
obs_prop_diff <- -round(diff(obs_test$estimate), 2)
obs_ci <- paste0(obs_prop_diff, " [", round(obs_test$conf.int[1], 2), ", ", round(obs_test$conf.int[2], 2), "]")
obs_tvalue <- ""
obs_pvalue <- sprintf('%.2f', obs_test$p.value)

tvalues <- tribble(~name,                            ~tvalue,
                   "Overall",                              as.character(""),
                   "Interventional",                 as.character(interv_tvalue),
                   "Pre-Registered",                 as.character(pre_tvalue),
                   "KKS",                            as.character(koor_tvalue),
                   "U of Excellence",                as.character(exz_tvalue),
                   "U of Exc. & KKS",                as.character(exzkoor_tvalue),
                   "On CT.gov",                      as.character(nct_tvalue),
                   "On DRKS",                        as.character(drks_tvalue),
                   "On EUCTR",                       as.character(euctr_tvalue))

chivalues <- tribble(~name,                            ~chivalue,
                     "Overall",                              as.character(""),
                     "Interventional",                 as.character(interv_chivalue),
                     "Pre-Registered",                 as.character(pre_chivalue),
                     "KKS",                            as.character(koor_chivalue),
                     "U of Excellence",                as.character(exz_chivalue),
                     "U of Exc. & KKS",                as.character(exzkoor_chivalue),
                     "On CT.gov",                      as.character(nct_chivalue),
                     "On DRKS",                        as.character(drks_chivalue),
                     "On EUCTR",                       as.character(euctr_chivalue))

cis <- tribble(~name,                            ~chivalue,
               "Overall",                              as.character(""),
               "Interventional",                 as.character(interv_ci),
               "Pre-Registered",                 as.character(pre_ci),
               "KKS",                            as.character(koor_ci),
               "U of Excellence",                as.character(exz_ci),
               "U of Exc. & KKS",                as.character(exzkoor_ci),
               "On CT.gov",                      as.character(nct_ci),
               "On DRKS",                        as.character(drks_ci),
               "On EUCTR",                       as.character(euctr_ci))

pvalues <- tribble(~name,                            ~pvalue,
                   "Overall",                              as.character(""),
                   "Interventional",                 as.character(interv_pvalue),
                   "Pre-Registered",                 as.character(pre_pvalue),
                   "KKS",                            as.character(koor_pvalue),
                   "U of Excellence",                as.character(exz_pvalue),
                   "U of Exc. & KKS",                as.character(exzkoor_pvalue),
                   "On CT.gov",                      as.character(nct_pvalue),
                   "On DRKS",                        as.character(drks_pvalue),
                   "On EUCTR",                       as.character(euctr_pvalue))

tab <- summary_man_quality_n %>% 
    left_join(summary_man_quality_perc,
              by = "name") %>% 
    left_join(summary_without_article_n,
              by = "name") %>% 
    left_join(summary_without_article_perc,
              by = "name") %>% 
    left_join(summary_with_article_n,
              by = "name") %>% 
    left_join(summary_with_article_perc,
              by = "name") %>% 
    left_join(chivalues, by = "name") %>% 
    left_join(cis, by = "name") %>% 
    left_join(pvalues, by = "name") 


tab <- tab %>% 
    mutate(across(everything(), function(x) ifelse(is.na(x), "", x)))

tab <- data.frame(tab, stringsAsFactors = F)
rownames(tab) <- tab$name
tab$name <- NULL
colnames(tab) <- c("n", "%", "n", "%", "n", "%", "$\\chi^2$ (df)", "d [95% CI]", "p-Value")

kbl(tab,
    booktabs = T,
    longtable = F,
    # format = "latex",
    escape = T,
    linesep = "",
    caption = "General descriptive information of the sample and the subset of studies for which published articles were found. P-values and 95% confidence intervals are from $\\chi^2$ tests for proportions. Confidence intervals represent differences in proportions. KKS: Coordinating Center for Clinical Studies. U of Exc.: University of Excellence. CT.gov: ClinicalTrials.gov. DRKS: German Clinical Trials Register. EUCTR: EU Clinical Trials Register.") %>%
    kable_styling(full_width = T, font_size = 8) %>%
    column_spec(1, width_min = "2in") %>%
    column_spec(9, width_min = "1in") %>%
    column_spec(10, width_min = "1in") %>%
    add_header_above(c(" " = 1,
                       "Sample" = 2,
                       "No Article" = 2,
                       "With Article" = 2,
                       " " = 3))



# Table Adherence Ratings -------------------------------------------------


a_categories <- c("des_a", "pri_a", "sec_a", "inc_a", "int_a")
a_stats <- map(a_categories, function(x) {
    man_quality %>% 
        count(.data[[x]]) %>% 
        drop_na %>% 
        rename_with(function(...) c("Score", "n_a")) %>% 
        mutate(`%` = round(n_a / sum(n_a) * 100, 1),
               Criterion = x,
               Criterion = gsub("_a", "", Criterion))
}) %>% 
    bind_rows()

qa_tab <- a_stats %>% 
    mutate(
        Criterion = case_when(Criterion == "des" ~ "Study Design",
                              Criterion == "pri" ~ "Primary Outcome",
                              Criterion == "sec" ~ "Secondary Outcome",
                              Criterion == "inc" ~ "Inclusion Criteria",
                              Criterion == "int" ~ "Intervention"
        )
    ) %>% 
    mutate(adherence_err = case_when(Score == 0 & `%` != "" ~ "Yes",
                                     Score == 1 & `%` != "" ~ "No",
                                     TRUE ~ "")) %>% 
    dplyr::select(Criterion, n_a, `%`, adherence_err) %>% 
    pivot_wider(names_from = c(adherence_err), values_from = c(`%`, n_a)) %>% 
    select(Criterion, n_a_Yes, `%_Yes`, n_a_No, `%_No`) |> 
    mutate(n_a_Yes = paste0(n_a_Yes, " / ", n_a_No + n_a_Yes)) |> 
    select(Criterion, n_a_Yes, `%_Yes`)


colnames(qa_tab) <- c("Criterion", "n", "%", "n", "%")

kbl(qa_tab, 
    booktabs = T, 
    linesep = rep("", 10),
    caption = "Adherence Ratings per Category.") %>% 
    kable_styling(full_width = F, font_size = 8) %>% # font size 8
    column_spec(1, width_min = "2in") %>% 
    add_header_above(c(" " = 1, "Adherence Errors" = 2))


# Studies with adherence errors -------------------------------------------

adh_errors <- man_quality %>% 
    select(ends_with("_a"))
apply(adh_errors, 1, function(x) sum(na.omit(x) < 0.5)) %>% 
    table()


# Manual classification of errors -----------------------------------------


adh_errors <- read_csv("adh_problems.csv")
adh_errors <- read_csv("writing_up/adherence_clinical_trials/Revision 1 Peerj/data and code/adh_problems.csv")

adh_errors %>% 
    dplyr::select(ends_with("error_type")) %>% 
    map(table)


# Inclusion more specific / less specific in register than article --------

man_quality %>% 
  filter(link_desc == "ganzer Artikel") %>% 
  count(inc_a_errtype) %>% 
  arrange(desc(n))


# Regression models -------------------------------------------------------


library(glue)

standardize <- function(x) {
    (x - mean(x, na.rm = T)) / (2 * sd(x, na.rm = T))
}

moddat <- man_quality %>% 
    mutate(
        registration_year = as.numeric(year(summarized_first_posted)),
        summarized_n = summarized_n / 1000,
        is_interventional = summarized_study_type == "Interventional") %>% 
    rowwise() %>% 
    mutate(
        mean_adherence = mean(c(des_a, pri_a, sec_a, inc_a, int_a), na.rm = T),
        sum_adherence_errors = sum(c(des_a < 1, pri_a < 1, sec_a < 1, inc_a < 1, int_a < 1), na.rm = T),
        any_adherence_error = mean_adherence < 1
    ) %>% 
    ungroup()

num_f <- function(x) {
    ifelse(x > 99, ">99", sprintf('%.2f', x))
}

run_regressions <- function(dep_var, x, family, tab_cap, expo, my_fmt = NULL) {
    require(sjPlot)
    require(modelsummary)
    form1 <- as.formula(paste0(dep_var, " ~ is_exz"))
    form2 <- as.formula(paste0(dep_var, " ~ is_exz + summarized_n + registration_year + is_interventional + has_nct + has_drks"))
    form3 <- as.formula(paste0(dep_var, " ~ is_koor"))
    form4 <- as.formula(paste0(dep_var, " ~ is_koor + summarized_n +registration_year + is_interventional + has_nct + has_drks"))
    form5 <- as.formula(paste0(dep_var, " ~ is_koor + is_exz + summarized_n + registration_year + is_interventional + has_nct + has_drks"))
    if (family == "gaussian") {
        mod1 <- lm(form1, data = x)
        mod2 <- lm(form2, data = x)     
        mod3 <- lm(form3, data = x)
        mod4 <- lm(form4, data = x)     
        mod5 <- lm(form5, data = x)     
    } else if (family == "binomial") {
        mod1 <- glm(form1, family = family, data = x)
        mod2 <- glm(form2, family = family, data = x)
        mod3 <- glm(form3, family = family, data = x)
        mod4 <- glm(form4, family = family, data = x)
        mod5 <- glm(form5, family = family, data = x)
    }
    
    names(mod1$coefficients) <- c("(Intercept)", "U of Exc.")
    names(mod2$coefficients) <- c("(Intercept)", "U of Exc.", "n (in 1000)", "Registration Year",
                                  "Interventional", "On CT.gov",
                                  "On DRKS")
    names(mod3$coefficients) <- c("(Intercept)", "KKS")
    names(mod4$coefficients) <-  c("(Intercept)", "KKS", "n (in 1000)", "Registration Year",
                                   "Interventional", "On CT.gov",
                                   "On DRKS")
    names(mod5$coefficients) <-  c("(Intercept)", "KKS", "U of Exc.", 
                                   "n (in 1000)",
                                   "Registration Year",
                                   "Interventional", "On CT.gov",
                                   "On DRKS")
    
    
    models <- list(
        mod1, mod2, mod3, mod4, mod5
    )
    
    options(modelsummary_latex = 'kableExtra')
    if (family == "gaussian") {
        modelsummary(models, 
                     output = "kableExtra", 
                     stars = F, 
                     fmt = fmt_decimal(digits = 2, pdigits = 3),
                     estimate = "{estimate} ({p.value})",
                     statistic = "[{conf.low}, {conf.high}]", 
                     conf_level = 0.95, 
                     title = tab_cap,
                     gof_omit = "BIC|AIC|RMSE|Log",
                     coef_omit = 1,
                     align = "llllll") |> 
            kable_styling(full_width = T, font_size = 10)
    } else if (family == "binomial") {
        modelsummary(models, 
                     output = "kableExtra",
                     stars = F, 
                     fmt = fmt_significant(2),
                     estimate = "{estimate} ({p.value})",
                     statistic = "[{conf.low}, {conf.high}]", 
                     conf_level = 0.95, 
                     title = tab_cap, 
                     gof_omit = "BIC|AIC|RMSE|Log|p",
                     coef_omit = 1,
                     exponentiate = expo,
                     align = "llllll") |> 
            kable_styling(full_width = T, font_size = 10) |> 
            row_spec(1, extra_latex_after = "\\vspace{0.2cm}") |> 
            row_spec(3, extra_latex_after = "\\vspace{0.2cm}") |> 
            row_spec(5, extra_latex_after = "\\vspace{0.2cm}") |> 
            row_spec(7, extra_latex_after = "\\vspace{0.2cm}") |> 
            row_spec(9, extra_latex_after = "\\vspace{0.2cm}") |> 
            row_spec(11, extra_latex_after = "\\vspace{0.2cm}") 
    }
}

run_regressions(
    "any_adherence_error",
    family = "binomial", 
    tab_cap = "Summary of Logistic Regression Models for the Occurence of Adherence Errors with p-values in Parentheses and 95% Confidence Intervals. All Estimates Show Odds Ratios. CT.gov: ClinicalTrials.gov. DRKS: German Clinical Trials Register. EUCTR: EU Clinical Trials Register. KKS: Coordinating Center for Clinical Studies. n: Sample Size. U of Exc.: University of Excellence.",
    x = moddat,
    expo = T
)