################################################################################
#                                                                              #
#     Simulation and comparison of three models for time-to-event analysis     #
#                                                                              #
################################################################################

##------------------------------------------------------------------------------
##
## General Setup 
##
##------------------------------------------------------------------------------

# set working directory accordingly 
setwd("Path")

# clear environment 
rm(list=ls(all=TRUE))

# load published packages 
library("survival")
library("dplyr")
library("tidyr")
library("addhazard")
library("matrixcalc") 
library("numDeriv")
library("optimx")

##------------------------------------------------------------------------------
##
## Introduction to overall simulation function "SimAddHaz()"
##
##------------------------------------------------------------------------------

# The subsequent simulation compares two models for time-to-event analysis: 
# Weibull Additive Hazard Model (WBAH)
# Log-Logistic Additive Hazard Model (LLAH)

# Doing so, the self-defined function SimAddHaz() is structured as follows:
# 1) artificially create data for each model of interest
# 2) censoring the survival times 
# 3) defining function for maximum likelihood estimation 
# 4) defining function to extract relevant estimation results 
# 5) executing the estimation for WBPH, WBAH and LLAH
# 6) compute bias, MSE, coverage and convergence
# 7) applying semi-parametric additive hazard model by Lin & Ying 
# 8) returning and visualizing estimation results 

# Necessary input parameters for SimAddHaz() (with no default provided) are: 
# TrueModel = string, setting one of the three models
# nSimDataSets = number of data sets to be simulated
# nPatDataSets = number of assumed study participants to be simulated
# beta = number, true effect of binary covariate
# a_WB = number, true parameter of Weibull distribution
# b_WB = number, true parameter of Weibull distribution
# a_LL = number, true parameter of Log-logistic distribution
# b_LL = number, true parameter of Log-logistic distribution
# events = number, defining number of events (60/ 80% i.e., 40/ 20% censoring)
# seed = number, setting seed to create reproducible random results

##------------------------------------------------------------------------------
##
## Define and load simulation function "SimAddHaz()"
##
##------------------------------------------------------------------------------

SimAddHaz <- function(TrueModel, nSimDataSets, nPatDataSets, beta, a_WB, b_WB, a_LL, b_LL, events, seed){
  
  set.seed(seed)
  
  ##----------------------------------------------------------------------------
  ##
  ## 1) Data generation
  ##
  ##---------------------------------------------------------------------------- 
  
  # 1. Weibull Additive Hazard Model
  if (TrueModel == 'Weibull Additive Hazard Model') {
    
    nSimulatedDataSets <- rep(1:nSimDataSets,each = nPatDataSets)
    PatID <- 1:nPatDataSets
    covar <- rbinom(n = nSimDataSets * nPatDataSets, size = 1, prob = 0.5)
    Daten_0 <- data.frame(nSimulatedDataSets, PatID, covar, TrueModel)
    Daten_0$beta <- beta
    Daten_0$a <- a_WB
    Daten_0$b <- b_WB
    Daten_0$surv <- c()
    
    # distribution function F (Weibull) with starting values a_WB, b_WB and beta for binary covariable X = 1
    fctF <- function(t, aa = a_WB, bb = b_WB, beta = beta){
      return(1 - exp(-(t/bb)^aa - t * beta)) }
    
    # discretize F (the finer the better)
    zeta_ <- 0.1*0:700															                          # discretizing points
    Fdis  <- sapply(zeta_, FUN = fctF, aa = a_WB, bb = b_WB, beta = beta) 		# applying function to the data
    Fdis  <- c(Fdis, 1) 													      	                    # adding 1 for F(\infty) = 1
    
    # inverse sampling
    anzahl <- length(Daten_0$nSimulatedDataSets)
    uu <- runif(n = anzahl, min = 0, max = 1)
    tt <- numeric(length(uu)) # these are the event times we are looking for
    
    # tt: Weibull Additive Hazard Model generated random variable for binary covariable X = 1
    for (ii in 1:length(uu)) {
      this.u     <- uu[ii]
      this.p     <- which.max(this.u <= Fdis)                      
      inv.slope_ <- (zeta_[this.p] - zeta_[this.p - 1])/(Fdis[this.p] - Fdis[this.p - 1])
      tt[ii]     <-  zeta_[this.p - 1] + (this.u - Fdis[this.p - 1]) * inv.slope_
    }
    
    # Random variable from weibull for x=0
    for (i in 1:nrow(Daten_0)) {
      if (Daten_0$covar[i] == 0) {
        Daten_0$surv[i] <- rweibull(1, shape = a_WB, scale = b_WB) # assigning survival times for X = 0
      } else {
        Daten_0$surv[i] <- tt[i]}}                                 # assigning survival times for X = 1
  }
  
  # 2. Log-logistic Additive Hazard Model
  if (TrueModel == 'Log-logistic Additive Hazard Model') {
    
    nSimulatedDataSets <- rep(1:nSimDataSets,each = nPatDataSets)
    PatID <- 1:nPatDataSets
    covar <- rbinom(n = nSimDataSets * nPatDataSets, size = 1, prob = 0.5)
    Daten_0 <- data.frame(nSimulatedDataSets,PatID, covar, TrueModel)
    Daten_0$beta <- beta
    Daten_0$uu_zv <- c()
    Daten_0$surv <- c()
    
    # distribution function F (Log-logistic) with starting values aa, bb and beta for binary Covariable X = 1
    fctF_ll <- function(t, aa = a_LL, bb = b_LL, beta = beta){
      return(1 - (((t/aa)^bb + 1)^(-1)) / exp(t*beta))}
    
    # discretize F (the finer the better)
    zeta_ <- 0.1*0:700															                              # discretizing points
    Fdis_ll  <- sapply(zeta_, FUN = fctF_ll, aa = a_LL, bb = b_LL, beta = beta) 	# applying function to the data
    Fdis_ll  <- head(Fdis_ll, -2)
    Fdis_ll  <- c(Fdis_ll, 1) 														                        # adding 1 for F(\infty) = 1
    
    # inverse sampling
    anzahl <- length(Daten_0$nSimulatedDataSets)
    uu <- runif(n = anzahl, min = 0, max = 1)
    tt <- numeric(length(uu))  # these are the event times we are looking for
    
    # tt: Log-logistic Additive Hazard Model generated random variable for binary Covariable X = 1
    for (ii in 1:length(uu)) {
      this.u     <- uu[ii]
      this.p     <- which.max(this.u <= Fdis_ll)                                    # Formula from Lexis-Paper
      inv.slope_ <- (zeta_[this.p] - zeta_[this.p - 1])/(Fdis_ll[this.p] - Fdis_ll[this.p - 1])
      tt[ii]     <-  zeta_[this.p - 1] + (this.u - Fdis_ll[this.p - 1]) * inv.slope_
    }
    
    for (i in 1:nrow(Daten_0)) {
      if (Daten_0$covar[i] == 0) {
        # Covariable X = 0: random variables for Log-logistic distribution are 
        # determined by inversion method (inverse cumulative distribution function)
        Daten_0$uu_zv[i] <- runif(n = 1, min = 0, max = 1)
        Daten_0$surv[i] <- a_LL*(Daten_0$uu_zv[i]/(1 - Daten_0$uu_zv[i]))^(1/b_LL)  # assigning survival times for X = 0
      } else {Daten_0$uu_zv[i] <- NA
      Daten_0$surv[i] <- tt[i]                                                      # assigning survival times for X = 1
      }
    }
    Daten_0 <- subset(Daten_0, select = -uu_zv)
  }
  
  ##----------------------------------------------------------------------------
  ##
  ## 2) Censoring the survival times
  ##
  ##---------------------------------------------------------------------------- 
  
  # For non-informative censoring, the original time is multiplied by an equally 
  # distributed random number. The censored survival time is then equally 
  # distributed between 0 and the original survival time
  Daten_0$cens <- NA
  Daten_0$cc <- NA
  Daten_0$event <- rbinom(n = nSimDataSets * nPatDataSets, size = 1, prob = events)
  
  for (i in 1:nrow(Daten_0)) {
    if (Daten_0$event[i] == 0) {
      Daten_0$cens[i] <- 1
    } else {Daten_0$cens[i] <- 0}
  }
  
  for (i in 1:nrow(Daten_0)) {
    if (Daten_0$cens[i] == 1) {
      Daten_0$cc[i] <- runif(n = 1, min = 0, max = 1)
      Daten_0$surv[i] <- Daten_0$surv[i]*Daten_0$cc[i]
    }
  }
  Daten_0 <- subset(Daten_0, select = -cc)
  
  ##----------------------------------------------------------------------------
  ##
  ## 3) Define log-likelihood function for optimization
  ##
  ##---------------------------------------------------------------------------- 

  # define and load log-likelihood function to optimized in the latter
  ll <- function(parms, data, dist = distribution) {
    
    b_covar <- as.numeric(parms["start_beta"]) # unknown regression coefficient 
    covar <- as.numeric(data$covar) # covariate vector
    
    log_h0_a <- as.numeric(parms["log_h0_a"])
    log_h0_b <- as.numeric(parms["log_h0_b"])
    
    h0_a <- exp(log_h0_a) # parameter of assumed baseline distribution; shape
    h0_b <- exp(log_h0_b) # parameter of assumed baseline distribution; scale
    
    surv <- as.numeric(data$surv) # survival/ observation time
    cens <- as.numeric(data$cens) # censoring indicator
    
    linp <- b_covar*covar
    
    # density and survival functions for Weibull distribution
    if (dist == "Weibull") {
      h0_density <- (h0_a/h0_b)*(surv/h0_b)^(h0_a - 1)*exp(-(surv/h0_b)^h0_a)
      h0_survival <- exp(-(surv/h0_b)^h0_a)
      
      # prevent infinite values by setting lower bound
      h0_density <- replace(h0_density, h0_density < 0.0000001, 0.0000001)
      h0_survival <- replace(h0_survival, h0_survival < 0.0000001, 0.0000001)
    }
    
    # density and survival functions for Log-logistic distribution
    if (dist == "LogLogistic") {
      h0_density <- ((h0_b/h0_a)*(surv/h0_a)^(h0_b - 1))/((1 + (surv/h0_a)^h0_b)^2)
      h0_survival <- 1 - (1/(1 + (surv/h0_a)^(-h0_b)))
      
      # prevent infinite values by setting lower bound
      h0_density <- replace(h0_density, h0_density < 0.0000001, 0.0000001)
      h0_survival <- replace(h0_survival, h0_survival < 0.0000001, 0.0000001)
      
    }
    
    # final log-likelihood
    ll <- (1-cens)*(log((h0_density + linp*h0_survival))-surv*linp) +
      (cens)*(log(h0_survival)-surv*linp)
    
    return(-sum(ll, na.rm = TRUE))
  }
  
  ##----------------------------------------------------------------------------
  ##
  ## 4) Define function to extract relevant estimation results
  ##
  ##---------------------------------------------------------------------------- 

  results <- function(est, conf.level = 0.95, df){
    # est = output of optim fitting procedure (with hessian = TRUE)
    # conf.level = 0.95 for 95\% Confidence Intervals (CI)
    # df = degree of freedom for students-t distribution (here: number of 
    #      studies multiplied by number of study arms)
    
    # extract estimated parameters
    beta <- as.numeric(est$par[1])
    log_h0_a <- as.numeric(est$par[2])
    log_h0_b <- as.numeric(est$par[3])
    
    # create hessian matrix
    opt <- c(est$par[1], est$par[2], est$par[3])
    hes <- hessian(ll, opt, data = dat)
    
    # estimate reciprocal condition number of hessian matrix and compare it to 
    # machine epsilon to make sure the hessian matrix is solvable. Otherwise set 
    # standard errors (SE) and CIs to NA to ensure that the program continues.
    mytry <- try(if ((rcond(hes) <=  0 + .Machine$double.eps) 
                     & (rcond(hes) >=  0 - .Machine$double.eps)){check <- "ok"})
    if (!inherits(mytry, "try-error")) {
      if ((rcond(hes) <=  0 + .Machine$double.eps) 
          & (rcond(hes) >=  0 - .Machine$double.eps)){
        SEs <- c(NA, NA, NA)
        names (SEs) <- c("beta", "log_h0_a", "log_h0_b")
        CI_beta <- c(NA, NA)
        CI_log_h0_a <- c(NA, NA)
        CI_log_h0_b <- c(NA, NA)
        
      } else {
        # inverse Fisher information matrix
        invFisher <- solve(hes) 
        
        # standard errors of transformed-scale parameter
        SEs <- suppressWarnings(sqrt(diag(invFisher))) # remain NA if insolvable
        names(SEs) <- c("start_beta", "log_h0_a", "log_h0_b")
        conf.level = 0.95
        
        crit <- qt(1 - (1 - conf.level)/2, df = dim(dat)[1] - 1)
        
        # Confidence intervals for beta, log_h0_a and log_h0_b
        CI_beta <- beta + crit*c(-1,1)*SEs["start_beta"]
        CI_log_h0_a <- log_h0_a + crit*c(-1,1)*SEs["log_h0_a"]
        CI_log_h0_b <- log_h0_b + crit*c(-1,1)*SEs["log_h0_b"]
      }
    } else {
      SEs <- c(NA, NA, NA)
      names (SEs) <- c("beta", "log_h0_a", "log_h0_b")
      CI_beta <- c(NA, NA)
      CI_log_h0_a <- c(NA, NA)
      CI_log_h0_b <- c(NA, NA)
    }
    out <- data.frame(rbind(
      par = c(beta, log_h0_a, log_h0_b),
      SE = c(SEs["start_beta"], SEs["log_h0_a"], SEs["log_h0_b"]),
      CI_lb = c(CI_beta[1], CI_log_h0_a[1], CI_log_h0_b[1]),
      CI_ub = c(CI_beta[2], CI_log_h0_a[2], CI_log_h0_b[2])))
    names(out) <- c("beta","log_h0_a", "log_h0_b")
    out
  }
  
  ##----------------------------------------------------------------------------
  ##
  ## 5) Executing the estimation
  ##
  ##---------------------------------------------------------------------------- 

  # overall setting, initialize hard-coded variables
  nPatDataSets <- nPatDataSets
  Events <- events
  TrueModel <- Daten_0$TrueModel[1]
  Estimates_total <- data.frame()
  
  # starting parameters for estimation function
  start_log_h0_a_wb <- log(a_WB)
  start_log_h0_b_wb <- log(b_WB)
  start_log_h0_a_ll <- log(a_LL)
  start_log_h0_b_ll <- log(b_LL)
  
  # estimation for every data set (nSimDataSets) separately
  for (k in 1:nSimDataSets) {
    dat = Daten_0[Daten_0$nSimulatedDataSets==k,]
    
    # estimation for WBAH and LLAH first 
    for (distribution in c("Weibull", "LogLogistic")){
      
      # define parameters to be estimated along with initial values for each 
      # distribution
      if (distribution == "Weibull") {
        initial <- c(start_beta = beta, 
                     log_h0_a = start_log_h0_a_wb, 
                     log_h0_b = start_log_h0_b_wb)
      } else if (distribution == "LogLogistic") {
        initial <- c(start_beta = beta, 
                     log_h0_a = start_log_h0_a_ll, 
                     log_h0_b = start_log_h0_b_ll)   
      }    
      
      # fitting the data
      if (TrueModel=="Weibull Additive Hazard Model") {
        fit <- optim(par = initial,
                     fn = ll,
                     data = dat,
                     lower= c(0, -Inf, -Inf), # limit to 0 < beta < Inf
                     upper= c(Inf, Inf, Inf), 
                     method="L-BFGS-B",
                     hessian = FALSE)
      } 
      if (TrueModel=="Log-logistic Additive Hazard Model") {
        fit <- optim(par = initial,
                     fn = ll,
                     data = dat,
                     lower= c(0, 0.1, 0.1), # parameters of Log-logistic 
                                            # function must strictly be > 0
                     upper= c(100, 100, 100), 
                     method="L-BFGS-B",
                     hessian = FALSE)
      } 
      
      # extract estimation results of interest
      res <- results(est = fit, conf.level = 0.95, df = dim(dat)[1])
      
      ##------------------------------------------------------------------------
      ##
      ## 6) Compute bias, MSE, coverage and convergence
      ##
      ##------------------------------------------------------------------------
      
      # 6.1) First for Weibull distribution
      if (distribution == "Weibull"){
      
      # bias and coverage for b_covar (i.e., unknown regression coefficient) ---
        beta_true <- dat[1,"beta"]
        Beta_estimate_WB <- res["par", "beta"]
        Beta_LowerCI_WB <- res["CI_lb", "beta"] 
        Beta_UpperCI_WB  <- res["CI_ub", "beta"]
        Beta_Bias_WB <- Beta_estimate_WB - beta_true
        
        # coverage is only calculated if estimate and CI are calculated. If that 
        # is impossible, coverage is set to missing.
        if (!(is.na(Beta_LowerCI_WB) && is.na(Beta_UpperCI_WB))) {
          if (beta_true > Beta_LowerCI_WB && beta_true < Beta_UpperCI_WB) {
            Beta_PCov_WB = 1
          } else {Beta_PCov_WB = 0}
        } else {Beta_PCov_WB = NA}
        Beta_SE_WB  <- res["SE", "beta"]
        Beta_MSE_WB <- (Beta_SE_WB^2) + (Beta_estimate_WB - beta_true)^2
        
        # bias and coverage for log_h0_a (i.e., shape parameter) ---------------
        loga_true_WB <- initial["log_h0_a"]
        loga_estimate_WB <- res["par", "log_h0_a"]
        loga_LowerCI_WB <- res["CI_lb", "log_h0_a"] 
        loga_UpperCI_WB  <- res["CI_ub", "log_h0_a"]
        loga_Bias_WB <- loga_estimate_WB - loga_true_WB
        
        # coverage is only calculated if estimate and CI are calculated. If that 
        # is impossible, coverage is set to missing.
        if (!(is.na(loga_LowerCI_WB) && is.na(loga_UpperCI_WB))) {
          if (loga_true_WB > loga_LowerCI_WB && loga_true_WB < loga_UpperCI_WB) {
            loga_PCov_WB = 1
          } else {loga_PCov_WB = 0}
        } else {loga_PCov_WB = NA}
        loga_SE_WB  <- res["SE", "log_h0_a"]
        loga_MSE_WB <- (loga_SE_WB^2) + (loga_estimate_WB - loga_true_WB)^2
        
        # bias and coverage for log_h0_b (i.e., scale parameter) ---------------
        logb_true_WB <- initial["log_h0_b"]
        logb_estimate_WB <- res["par", "log_h0_b"]
        logb_LowerCI_WB <- res["CI_lb", "log_h0_b"] 
        logb_UpperCI_WB  <- res["CI_ub", "log_h0_b"]
        logb_Bias_WB <- logb_estimate_WB - logb_true_WB
        
        # coverage is only calculated if estimate and CI are calculated. If that 
        # is impossible, coverage is set to missing.
        if (!(is.na(logb_LowerCI_WB) && is.na(logb_UpperCI_WB))) {
          if (logb_true_WB > logb_LowerCI_WB && logb_true_WB < logb_UpperCI_WB) {
            logb_PCov_WB = 1
          } else {logb_PCov_WB = 0}
        } else {logb_PCov_WB = NA}
        logb_SE_WB  <- res["SE", "log_h0_b"]
        logb_MSE_WB <- (logb_SE_WB^2) + (logb_estimate_WB - logb_true_WB)^2
        
        # convergence 
        if (fit$convergence == 0) {
          convergence_WB <- 1
        } else {convergence_WB <- 0}
        
        # 6.2) Now for Log-logistic distribution -------------------------------
      } else if (distribution == "LogLogistic"){
        
        # bias and coverage for b_covar (i.e., unknown regression coefficient) -
        beta_true <- dat[1,"beta"]
        Beta_estimate_LL <- res["par", "beta"]
        Beta_LowerCI_LL <- res["CI_lb", "beta"] 
        Beta_UpperCI_LL  <- res["CI_ub", "beta"]
        Beta_Bias_LL <- Beta_estimate_LL - beta_true

        # coverage is only calculated if estimate and CI are calculated. If that 
        # is impossible, coverage is set to missing.
        if (!(is.na(Beta_LowerCI_LL) && is.na(Beta_UpperCI_LL))) {
          if (beta_true > Beta_LowerCI_LL && beta_true < Beta_UpperCI_LL) {
            Beta_PCov_LL = 1
          } else {Beta_PCov_LL = 0}
        } else {Beta_PCov_LL = NA}
        Beta_SE_LL  <- res["SE", "beta"]
        Beta_MSE_LL <- (Beta_SE_LL^2) + (Beta_estimate_LL - beta_true)^2
        
        # bias and coverage for log_h0_a (i.e., shape parameter) ---------------
        loga_true_LL <- initial["log_h0_a"]
        loga_estimate_LL <- res["par", "log_h0_a"]
        loga_LowerCI_LL <- res["CI_lb", "log_h0_a"] 
        loga_UpperCI_LL  <- res["CI_ub", "log_h0_a"]
        loga_Bias_LL <- loga_estimate_LL - loga_true_LL

        # coverage is only calculated if estimate and CI are calculated. If that 
        # is impossible, coverage is set to missing.
        if (!(is.na(loga_LowerCI_LL) && is.na(loga_UpperCI_LL))) {
          if (loga_true_LL > loga_LowerCI_LL && loga_true_LL < loga_UpperCI_LL) {
            loga_PCov_LL = 1
          } else {loga_PCov_LL = 0}
        } else {loga_PCov_LL = NA}
        loga_SE_LL  <- res["SE", "log_h0_a"]
        loga_MSE_LL <- (loga_SE_LL^2) + (loga_estimate_LL - loga_true_LL)^2
        
        # bias and coverage for log_h0_b (i.e., scale parameter) ---------------
        logb_true_LL <- initial["log_h0_b"]
        logb_estimate_LL <- res["par", "log_h0_b"]
        logb_LowerCI_LL <- res["CI_lb", "log_h0_b"] 
        logb_UpperCI_LL  <- res["CI_ub", "log_h0_b"]
        logb_Bias_LL <- logb_estimate_LL - logb_true_LL

        # coverage is only calculated if estimate and CI are calculated. If that 
        # is impossible, coverage is set to missing.
        if (!(is.na(logb_LowerCI_LL) && is.na(logb_UpperCI_LL))) {
          if (logb_true_LL > logb_LowerCI_LL && logb_true_LL < logb_UpperCI_LL) {
            logb_PCov_LL = 1
          } else {logb_PCov_LL = 0}
        } else {logb_PCov_LL = NA}
        logb_SE_LL  <- res["SE", "log_h0_b"]
        logb_MSE_LL <- (logb_SE_LL^2) + (logb_estimate_LL - logb_true_LL)^2
        
        # convergence 
        if (fit$convergence == 0) {
          convergence_LL <- 1
        } else {convergence_LL <- 0}
        
      }
    }
    
    ##--------------------------------------------------------------------------
    ##
    ## 7) Estimation with semi-parametric additive hazard model by Lin & Ying
    ##
    ##--------------------------------------------------------------------------
    
    event <- as.numeric(dat$event)
    surv <- as.numeric(dat$surv)
    covar <- as.numeric(dat$covar)
    beta_true <- dat[1,"beta"]
    survObj <- Surv(surv, event)
    try(model <- ah(survObj ~ covar, data = dat, ties = FALSE, robust = FALSE))
    res <- summary(model)
    
    # extract estimated regression parameter 
    Beta_estimate_YL <- res$coefficients[1]
    Beta_SE_YL <- res$coefficients[2]
    
    # compute bias, MSE and CI
    Beta_Bias_YL <- Beta_estimate_YL - beta_true
    Beta_MSE_YL <- (Beta_SE_YL^2) + (Beta_estimate_YL - beta_true)^2
    Beta_LowerCI_YL <- res$coefficients[3]
    Beta_UpperCI_YL <- res$coefficients[4]
    if (!(is.na(Beta_LowerCI_YL) && is.na(Beta_UpperCI_YL))) {
      if (beta_true > Beta_LowerCI_YL && beta_true < Beta_UpperCI_YL) {
        Beta_PCov_YL = 1
      } else {Beta_PCov_YL = 0}
    } else {Beta_PCov_YL = NA}
    if (!(is.na(Beta_estimate_YL))) {
      convergence_YL <- 1
    } else {convergence_YL <- 0}
    
    ##--------------------------------------------------------------------------
    ##
    ## 8) Return and visualize simulation results
    ##
    ##--------------------------------------------------------------------------
    
    # 8.1) Return simulation results -------------------------------------------
    # create outcome data frame
    DataSet <- k
    Estimates <- data.frame(DataSet, TrueModel, nPatDataSets, Events,
                            beta_true, 
                            # for Weibull distribution 
                            Beta_estimate_WB, Beta_LowerCI_WB, 
                            Beta_UpperCI_WB, Beta_Bias_WB, Beta_PCov_WB, 
                            Beta_SE_WB, Beta_MSE_WB, loga_true_WB, 
                            loga_estimate_WB, loga_LowerCI_WB, loga_UpperCI_WB, 
                            loga_Bias_WB, loga_PCov_WB, loga_SE_WB, loga_MSE_WB,
                            logb_true_WB, logb_estimate_WB, logb_LowerCI_WB, 
                            logb_UpperCI_WB, logb_Bias_WB, logb_PCov_WB, 
                            logb_SE_WB, logb_MSE_WB, convergence_WB,
                            # for Log-logistic distribution 
                            Beta_estimate_LL, Beta_LowerCI_LL, Beta_UpperCI_LL, 
                            Beta_Bias_LL, Beta_PCov_LL, Beta_SE_LL, Beta_MSE_LL,
                            loga_true_LL, loga_estimate_LL, loga_LowerCI_LL, 
                            loga_UpperCI_LL, loga_Bias_LL, loga_PCov_LL, 
                            loga_SE_LL, loga_MSE_LL, logb_true_LL, 
                            logb_estimate_LL, logb_LowerCI_LL, logb_UpperCI_LL, 
                            logb_Bias_LL, logb_PCov_LL, logb_SE_LL, logb_MSE_LL,
                            convergence_LL,
                            # for Lin & Ying model 
                            Beta_estimate_YL, Beta_Bias_YL, Beta_SE_YL, 
                            Beta_MSE_YL, Beta_LowerCI_YL, Beta_UpperCI_YL, 
                            Beta_PCov_YL, convergence_YL)
    
    Estimates_total <- rbind(Estimates_total, Estimates)
    row.names(Estimates_total) <- NULL
  }
  Estimates_total 
  
  # invent ID for simulation setting: contains abbreviation of true 
  # distribution, hundreds digit of number of patients (e.g. 5 for 500) and tens 
  # digit of percent of events (e.g. 6 for 60%) 
  
  # create indicator of true distribution 
  if (Estimates_total$TrueModel[1] == "Weibull Additive Hazard Model"){
    dis <- "WBAH"
  } else if (Estimates_total$TrueModel[1] == "Log-logistic Additive Hazard Model"){
    dis <- "LLAH"
  } 
  # create ID
  ID <- paste(dis, Estimates_total$beta_true[1], 
              Estimates_total$nPatDataSets[1], 
              Estimates_total$Events[1], sep="_")
  
  # mean and median of bias, MSE and coverage (for WB, LL and Ying & Lin model)
  Mean_Bias_WB <- mean(Estimates_total$Beta_Bias_WB, na.rm=TRUE)
  Mean_Bias_LL <- mean(Estimates_total$Beta_Bias_LL, na.rm=TRUE)
  Mean_Bias_YL <- mean(Estimates_total$Beta_Bias_YL, na.rm=TRUE)
  Mean_MSE_WB <- mean(Estimates_total$Beta_MSE_WB, na.rm=TRUE)
  Mean_MSE_LL <- mean(Estimates_total$Beta_MSE_LL, na.rm=TRUE)
  Mean_MSE_YL <- mean(Estimates_total$Beta_MSE_YL, na.rm=TRUE)
  Mean_Converg_WB <- mean(Estimates_total$convergence_WB)
  Mean_Converg_LL <- mean(Estimates_total$convergence_LL)
  Mean_Converg_YL <- mean(Estimates_total$convergence_YL)
  Beta_SE_WB <- mean(Estimates_total$Beta_SE_WB)
  Beta_SE_LL <- mean(Estimates_total$Beta_SE_LL)
  Beta_SE_YL <- mean(Estimates_total$Beta_SE_YL)
  Beta_LowerCI_WB <- mean(Beta_LowerCI_WB) 
  Beta_LowerCI_LL <- mean(Beta_LowerCI_LL) 
  Beta_LowerCI_YL <- mean(Beta_LowerCI_YL) 
  Beta_UpperCI_WB <- mean(Beta_UpperCI_WB)
  Beta_UpperCI_LL <- mean(Beta_UpperCI_LL)
  Beta_UpperCI_YL <- mean(Beta_UpperCI_YL)
  Median_Bias_WB <- median(Estimates_total$Beta_Bias_WB, na.rm=TRUE)
  Median_Bias_LL <- median(Estimates_total$Beta_Bias_LL, na.rm=TRUE)
  Median_Bias_YL <- median(Estimates_total$Beta_Bias_YL, na.rm=TRUE)
  Median_MSE_WB <- median(Estimates_total$Beta_MSE_WB, na.rm=TRUE)
  Median_MSE_LL <- median(Estimates_total$Beta_MSE_LL, na.rm=TRUE)
  Median_MSE_YL <- median(Estimates_total$Beta_MSE_YL, na.rm=TRUE)
  Mean_Coverage_WB <- mean(Estimates_total$Beta_PCov_WB, na.rm=TRUE)
  Mean_Coverage_LL <- mean(Estimates_total$Beta_PCov_LL, na.rm=TRUE)
  Mean_Coverage_YL <- mean(Estimates_total$Beta_PCov_YL, na.rm=TRUE)
  
  # summarize all results in one data frame 
  tmp <- data.frame(ID, 
                    #Beta_SE_WB, Beta_SE_LL, Beta_SE_YL, Beta_LowerCI_WB, Beta_LowerCI_LL, Beta_LowerCI_YL, 
                    #Beta_UpperCI_WB, Beta_UpperCI_LL, Beta_UpperCI_YL, 
                    Mean_Bias_WB, Mean_Bias_LL, Mean_Bias_YL, Mean_MSE_WB, 
                    Mean_MSE_LL, Mean_MSE_YL, Median_Bias_WB, Median_Bias_LL, 
                    Median_Bias_YL, Median_MSE_WB, Median_MSE_LL, Median_MSE_YL,
                    Mean_Coverage_WB, Mean_Coverage_LL, Mean_Coverage_YL, 
                    Mean_Converg_WB, Mean_Converg_LL, Mean_Converg_YL)
  tmp
  
  # 8.2) Data to visualize simulation results ----------------------------------
  
  # save data in data frame to create box plot for estimated bias
  box_bias <- data.frame(Estimates_total$Beta_Bias_WB, 
                         Estimates_total$Beta_Bias_LL, 
                         Estimates_total$Beta_Bias_YL)
  
  # save data in data frame to create box plot for estimated MSE
  box_mse <- data.frame(Estimates_total$Beta_MSE_WB, 
                         Estimates_total$Beta_MSE_LL, 
                         Estimates_total$Beta_MSE_YL)
  
  # save data in data frame to create box plot for mean coverage
  box_coverage <- data.frame(Estimates_total$Beta_PCov_WB, 
                             Estimates_total$Beta_PCov_LL, 
                             Estimates_total$Beta_PCov_YL)
  
  # save data in data frame to create box plot for mean convergence
  box_convergence <- data.frame(Estimates_total$convergence_WB, 
                                Estimates_total$convergence_LL, 
                                Estimates_total$convergence_YL)
  
  return(list(tmp, box_bias, box_mse, box_coverage, box_convergence))
}

##------------------------------------------------------------------------------
##
## Execute settings 
##
##------------------------------------------------------------------------------

# Weibull Additive Hazard Model 
# beta = 0
WBAH106 <- SimAddHaz(TrueModel="Weibull Additive Hazard Model", nSimDataSets=1000, nPatDataSets=200, beta=0, a_WB=0.86, b_WB=1.77, a_LL=1.06, b_LL=1.14, events=0.6, seed=1000)
WBAH108 <- SimAddHaz(TrueModel="Weibull Additive Hazard Model", nSimDataSets=1000, nPatDataSets=200, beta=0, a_WB=0.86, b_WB=1.77, a_LL=1.06, b_LL=1.14, events=0.8, seed=1005)
WBAH506 <- SimAddHaz(TrueModel="Weibull Additive Hazard Model", nSimDataSets=1000, nPatDataSets=50, beta=0, a_WB=0.86, b_WB=1.77, a_LL=1.06, b_LL=1.14, events=0.6, seed=1008)
WBAH508 <- SimAddHaz(TrueModel="Weibull Additive Hazard Model", nSimDataSets=1000, nPatDataSets=50, beta=0, a_WB=0.86, b_WB=1.77, a_LL=1.06, b_LL=1.14, events=0.8, seed=1008)
# beta = 0.8
WBAH186 <- SimAddHaz(TrueModel="Weibull Additive Hazard Model", nSimDataSets=1000, nPatDataSets=200, beta=0.8, a_WB=0.86, b_WB=1.77, a_LL=1.06, b_LL=1.14, events=0.6, seed=2008)
WBAH188 <- SimAddHaz(TrueModel="Weibull Additive Hazard Model", nSimDataSets=1000, nPatDataSets=200, beta=0.8, a_WB=0.86, b_WB=1.77, a_LL=1.06, b_LL=1.14, events=0.8, seed=1009)
WBAH586 <- SimAddHaz(TrueModel="Weibull Additive Hazard Model", nSimDataSets=1000, nPatDataSets=50, beta=0.8, a_WB=0.86, b_WB=1.77, a_LL=1.06, b_LL=1.14, events=0.6, seed=1010)
WBAH588 <- SimAddHaz(TrueModel="Weibull Additive Hazard Model", nSimDataSets=1000, nPatDataSets=50, beta=0.8, a_WB=0.86, b_WB=1.77, a_LL=1.06, b_LL=1.14, events=0.8, seed=1010)
# beta = 1.6
WBAH166 <- SimAddHaz(TrueModel="Weibull Additive Hazard Model", nSimDataSets=1000, nPatDataSets=200, beta=1.6, a_WB=0.86, b_WB=1.77, a_LL=1.06, b_LL=1.14, events=0.6, seed=2022)
WBAH168 <- SimAddHaz(TrueModel="Weibull Additive Hazard Model", nSimDataSets=1000, nPatDataSets=200, beta=1.6, a_WB=0.86, b_WB=1.77, a_LL=1.06, b_LL=1.14, events=0.8, seed=1013)
WBAH566 <- SimAddHaz(TrueModel="Weibull Additive Hazard Model", nSimDataSets=1000, nPatDataSets=50, beta=1.6, a_WB=0.86, b_WB=1.77, a_LL=1.06, b_LL=1.14, events=0.6, seed=3024)
WBAH568 <- SimAddHaz(TrueModel="Weibull Additive Hazard Model", nSimDataSets=1000, nPatDataSets=50, beta=1.6, a_WB=0.86, b_WB=1.77, a_LL=1.06, b_LL=1.14, events=0.8, seed=2015)

# Log-logistic Additive Hazard Model
# beta = 0
LLAH106 <- SimAddHaz(TrueModel="Log-logistic Additive Hazard Model", nSimDataSets=1000, nPatDataSets=200, beta=0, a_WB=0.86, b_WB=1.77, a_LL=1.06, b_LL=1.14, events=0.6, seed=1033)
LLAH108 <- SimAddHaz(TrueModel="Log-logistic Additive Hazard Model", nSimDataSets=1000, nPatDataSets=200, beta=0, a_WB=0.86, b_WB=1.77, a_LL=1.06, b_LL=1.14, events=0.8, seed=1027)
LLAH506 <- SimAddHaz(TrueModel="Log-logistic Additive Hazard Model", nSimDataSets=1000, nPatDataSets=50, beta=0, a_WB=0.86, b_WB=1.77, a_LL=1.06, b_LL=1.14, events=0.6, seed=1018)
LLAH508 <- SimAddHaz(TrueModel="Log-logistic Additive Hazard Model", nSimDataSets=1000, nPatDataSets=50, beta=0, a_WB=0.86, b_WB=1.77, a_LL=1.06, b_LL=1.14, events=0.8, seed=2019)
# beta = 0.8
LLAH186 <- SimAddHaz(TrueModel="Log-logistic Additive Hazard Model", nSimDataSets=1000, nPatDataSets=200, beta=0.8, a_WB=0.86, b_WB=1.77, a_LL=1.06, b_LL=1.14, events=0.6, seed=1020)
LLAH188 <- SimAddHaz(TrueModel="Log-logistic Additive Hazard Model", nSimDataSets=1000, nPatDataSets=200, beta=0.8, a_WB=0.86, b_WB=1.77, a_LL=1.06, b_LL=1.14, events=0.8, seed=1021)
LLAH586 <- SimAddHaz(TrueModel="Log-logistic Additive Hazard Model", nSimDataSets=1000, nPatDataSets=50, beta=0.8, a_WB=0.86, b_WB=1.77, a_LL=1.06, b_LL=1.14, events=0.6, seed=1032)
LLAH588 <- SimAddHaz(TrueModel="Log-logistic Additive Hazard Model", nSimDataSets=1000, nPatDataSets=50, beta=0.8, a_WB=0.86, b_WB=1.77,a_LL=1.06, b_LL=1.14, events=0.8, seed=1033)
# beta = 1.6
LLAH166 <- SimAddHaz(TrueModel="Log-logistic Additive Hazard Model", nSimDataSets=1000, nPatDataSets=200, beta=1.6, a_WB=0.86, b_WB=1.77, a_LL=1.06, b_LL=1.14, events=0.6, seed=1024)
LLAH168 <- SimAddHaz(TrueModel="Log-logistic Additive Hazard Model", nSimDataSets=1000, nPatDataSets=200, beta=1.6, a_WB=0.86, b_WB=1.77, a_LL=1.06, b_LL=1.14, events=0.8, seed=3025)
LLAH566 <- SimAddHaz(TrueModel="Log-logistic Additive Hazard Model", nSimDataSets=1000, nPatDataSets=50, beta=1.6, a_WB=0.86, b_WB=1.77, a_LL=1.06, b_LL=1.14, events=0.6, seed=1026)
LLAH568 <- SimAddHaz(TrueModel="Log-logistic Additive Hazard Model", nSimDataSets=1000, nPatDataSets=50, beta=1.6, a_WB=0.86, b_WB=1.77, a_LL=1.06, b_LL=1.14, events=0.8, seed=1027)

# concatenate all simulation results 
final.results <- rbind(WBAH106[[1]], WBAH108[[1]], WBAH506[[1]], WBAH508[[1]],
                       WBAH186[[1]], WBAH188[[1]], WBAH586[[1]], WBAH588[[1]],
                       WBAH166[[1]], WBAH168[[1]], WBAH566[[1]], WBAH568[[1]],
                       LLAH106[[1]], LLAH108[[1]], LLAH506[[1]], LLAH508[[1]],
                       LLAH186[[1]], LLAH188[[1]], LLAH586[[1]], LLAH588[[1]],
                       LLAH166[[1]], LLAH168[[1]], LLAH566[[1]], LLAH568[[1]])

# concatenate simulated bias for visualization
expand_ID <- lapply(final.results[1], rep, rep(1000,24))
final.bias <- cbind(expand_ID, 
                    rbind(WBAH106[[2]], WBAH108[[2]], WBAH506[[2]], WBAH508[[2]],
                          WBAH186[[2]], WBAH188[[2]], WBAH586[[2]], WBAH588[[2]],
                          WBAH166[[2]], WBAH168[[2]], WBAH566[[2]], WBAH568[[2]],
                          LLAH106[[2]], LLAH108[[2]], LLAH506[[2]], LLAH508[[2]],
                          LLAH186[[2]], LLAH188[[2]], LLAH586[[2]], LLAH588[[2]],
                          LLAH166[[2]], LLAH168[[2]], LLAH566[[2]], LLAH568[[2]]))
names(final.bias)[2] <- "WB"
names(final.bias)[3] <- "LL"
names(final.bias)[4] <- "YL"

# concatenate simulated mse for visualization
final.mse <- cbind(expand_ID,
                   rbind(WBAH106[[3]], WBAH108[[3]], WBAH506[[3]], WBAH508[[3]],
                         WBAH186[[3]], WBAH188[[3]], WBAH586[[3]], WBAH588[[3]],
                         WBAH166[[3]], WBAH168[[3]], WBAH566[[3]], WBAH568[[3]],
                         LLAH106[[3]], LLAH108[[3]], LLAH506[[3]], LLAH508[[3]],
                         LLAH186[[3]], LLAH188[[3]], LLAH586[[3]], LLAH588[[3]],
                         LLAH166[[3]], LLAH168[[3]], LLAH566[[3]], LLAH568[[3]]))

names(final.mse)[2] <- "WB"
names(final.mse)[3] <- "LL"
names(final.mse)[4] <- "YL"

# concatenate simulated coverage for visualization
final.coverage <- cbind(expand_ID,
                        rbind(WBAH106[[4]], WBAH108[[4]], WBAH506[[4]], WBAH508[[4]],
                              WBAH186[[4]], WBAH188[[4]], WBAH586[[4]], WBAH588[[4]],
                              WBAH166[[4]], WBAH168[[4]], WBAH566[[4]], WBAH568[[4]],
                              LLAH106[[4]], LLAH108[[4]], LLAH506[[4]], LLAH508[[4]],
                              LLAH186[[4]], LLAH188[[4]], LLAH586[[4]], LLAH588[[4]],
                              LLAH166[[4]], LLAH168[[4]], LLAH566[[4]], LLAH568[[4]]))

names(final.coverage)[2] <- "WB"
names(final.coverage)[3] <- "LL"
names(final.coverage)[4] <- "YL"

# concatenate simulated mse for visualization
final.convergence <- cbind(expand_ID,
                           rbind(WBAH106[[5]], WBAH108[[5]], WBAH506[[5]], WBAH508[[5]],
                                 WBAH186[[5]], WBAH188[[5]], WBAH586[[5]], WBAH588[[5]],
                                 WBAH166[[5]], WBAH168[[5]], WBAH566[[5]], WBAH568[[5]],
                                 LLAH106[[5]], LLAH108[[5]], LLAH506[[5]], LLAH508[[5]],
                                 LLAH186[[5]], LLAH188[[5]], LLAH586[[5]], LLAH588[[5]],
                                 LLAH166[[5]], LLAH168[[5]], LLAH566[[5]], LLAH568[[5]]))

names(final.convergence)[2] <- "WB"
names(final.convergence)[3] <- "LL"
names(final.convergence)[4] <- "YL"

# reorder categories (i.e. settings in ID-column)
final.bias$ID <- factor(final.bias$ID, 
                        ordered = TRUE, 
                        levels = c("LLAH_1.6_50_0.8", "LLAH_1.6_50_0.6", "LLAH_1.6_200_0.8", "LLAH_1.6_200_0.6", 
                                    "LLAH_0.8_50_0.8", "LLAH_0.8_50_0.6", "LLAH_0.8_200_0.8", "LLAH_0.8_200_0.6", 
                                    "LLAH_0_50_0.8", "LLAH_0_50_0.6", "LLAH_0_200_0.8", "LLAH_0_200_0.6", 
                                    "WBAH_1.6_50_0.8", "WBAH_1.6_50_0.6", "WBAH_1.6_200_0.8", "WBAH_1.6_200_0.6", 
                                    "WBAH_0.8_50_0.8", "WBAH_0.8_50_0.6", "WBAH_0.8_200_0.8", "WBAH_0.8_200_0.6", 
                                    "WBAH_0_50_0.8", "WBAH_0_50_0.6", "WBAH_0_200_0.8", "WBAH_0_200_0.6"))
final.mse$ID <- factor(final.mse$ID, 
                        ordered = TRUE, 
                        levels = c("LLAH_1.6_50_0.8", "LLAH_1.6_50_0.6", "LLAH_1.6_200_0.8", "LLAH_1.6_200_0.6", 
                                   "LLAH_0.8_50_0.8", "LLAH_0.8_50_0.6", "LLAH_0.8_200_0.8", "LLAH_0.8_200_0.6", 
                                   "LLAH_0_50_0.8", "LLAH_0_50_0.6", "LLAH_0_200_0.8", "LLAH_0_200_0.6", 
                                   "WBAH_1.6_50_0.8", "WBAH_1.6_50_0.6", "WBAH_1.6_200_0.8", "WBAH_1.6_200_0.6", 
                                   "WBAH_0.8_50_0.8", "WBAH_0.8_50_0.6", "WBAH_0.8_200_0.8", "WBAH_0.8_200_0.6", 
                                   "WBAH_0_50_0.8", "WBAH_0_50_0.6", "WBAH_0_200_0.8", "WBAH_0_200_0.6"))

final.coverage$ID <- factor(final.coverage$ID, 
                       ordered = TRUE, 
                       levels = c("LLAH_1.6_50_0.8", "LLAH_1.6_50_0.6", "LLAH_1.6_200_0.8", "LLAH_1.6_200_0.6", 
                                  "LLAH_0.8_50_0.8", "LLAH_0.8_50_0.6", "LLAH_0.8_200_0.8", "LLAH_0.8_200_0.6", 
                                  "LLAH_0_50_0.8", "LLAH_0_50_0.6", "LLAH_0_200_0.8", "LLAH_0_200_0.6", 
                                  "WBAH_1.6_50_0.8", "WBAH_1.6_50_0.6", "WBAH_1.6_200_0.8", "WBAH_1.6_200_0.6", 
                                  "WBAH_0.8_50_0.8", "WBAH_0.8_50_0.6", "WBAH_0.8_200_0.8", "WBAH_0.8_200_0.6", 
                                  "WBAH_0_50_0.8", "WBAH_0_50_0.6", "WBAH_0_200_0.8", "WBAH_0_200_0.6"))

final.convergence$ID <- factor(final.convergence$ID, 
                       ordered = TRUE, 
                       levels = c("LLAH_1.6_50_0.8", "LLAH_1.6_50_0.6", "LLAH_1.6_200_0.8", "LLAH_1.6_200_0.6", 
                                  "LLAH_0.8_50_0.8", "LLAH_0.8_50_0.6", "LLAH_0.8_200_0.8", "LLAH_0.8_200_0.6", 
                                  "LLAH_0_50_0.8", "LLAH_0_50_0.6", "LLAH_0_200_0.8", "LLAH_0_200_0.6", 
                                  "WBAH_1.6_50_0.8", "WBAH_1.6_50_0.6", "WBAH_1.6_200_0.8", "WBAH_1.6_200_0.6", 
                                  "WBAH_0.8_50_0.8", "WBAH_0.8_50_0.6", "WBAH_0.8_200_0.8", "WBAH_0.8_200_0.6", 
                                  "WBAH_0_50_0.8", "WBAH_0_50_0.6", "WBAH_0_200_0.8", "WBAH_0_200_0.6"))
                          
# save summarized simulation results as csv-file
write.table(final.results, "simulation_results_220824.csv", row.names = FALSE, sep = ";", dec = ".")

# save R environment
save.image(file = "simulation_results_220824.RData")

##------------------------------------------------------------------------------
##
## Plot results (MSE, bias) for each setting 
##
##------------------------------------------------------------------------------
# overall box plot of bias -----------------------------------------------------
#tiff("./Overall_bias.tiff", width=1417, height=1417, res=300, compression = "lzw", pointsize=9)
png(file = "./Overall_bias.png", width=1417, height=1417, res=300)
par(mfrow=c(1,3), mai = c(0.7, 0.1, 0.5, 0.1), oma = c(0, 8.5, 0, 0))
# WB
boxplot(WB ~ ID, data=final.bias, main="WB", xlab = "", las = 1, 
        horizontal = TRUE, at = c(0,1,2,3,4,5,6,7,8,9,10,11,  
                                  14,15,16,17,18,19,20,21,22,23,24,25))
# LL
boxplot(LL ~ ID, data=final.bias, yaxt = "n", ylab = "", main="LL", 
        xlab = "Bias of estimated ", las = 1, horizontal = TRUE, 
        at = c(0,1,2,3,4,5,6,7,8,9,10,11,  14,15,16,17,18,19,20,21,22,23,24,25))
# YL
boxplot(YL ~ ID, data=final.bias, yaxt = "n", ylab = "", main="LY", xlab = "", 
        las = 1, horizontal = TRUE, at = c(0,1,2,3,4,5,6,7,8,9,10,11,  
                                           14,15,16,17,18,19,20,21,22,23,24,25))
dev.off()

# overall box plot of mse ------------------------------------------------------
#tiff("./Overall_mse.tiff", width=1417, height=1417, res=300, compression = "lzw", pointsize=9)
png(file = "./Overall_mse.png", width=1417, height=1417, res=300)
par(mfrow=c(1,3), mai = c(0.7, 0.1, 0.5, 0.1), oma = c(0, 8.5, 0, 0))
# WB
boxplot(WB ~ ID, data=final.mse, main = "WB", xlab = "", las = 1, 
        horizontal = TRUE, at = c(0,1,2,3,4,5,6,7,8,9,10,11,  
                                  14,15,16,17,18,19,20,21,22,23,24,25))
# LL
boxplot(LL ~ ID, data=final.mse, main="LL", yaxt = "n", ylab = "",
        xlab = "MSE of estimated ", las = 1, horizontal = TRUE, 
        at = c(0,1,2,3,4,5,6,7,8,9,10,11,  14,15,16,17,18,19,20,21,22,23,24,25))

# YL
boxplot(YL ~ ID, data=final.mse, main="LY", yaxt = "n", ylab = "", xlab = "", 
        las = 1, horizontal = TRUE, at = c(0,1,2,3,4,5,6,7,8,9,10,11,  
                                           14,15,16,17,18,19,20,21,22,23,24,25))
dev.off()

# overall bar plot of coverage -------------------------------------------------
cov_WB <- t(table(final.coverage$ID, final.coverage$WB)/
              rowSums(table(final.coverage$ID, final.coverage$WB)))
cov_LL <- t(table(final.coverage$ID, final.coverage$LL)/
              rowSums(table(final.coverage$ID, final.coverage$LL)))
cov_YL <- t(table(final.coverage$ID, final.coverage$YL)/
              rowSums(table(final.coverage$ID, final.coverage$YL)))

png(file = "./Overall_coverage_freq.png", width=1417, height=1417, res=300)
par(mfrow=c(1,3), mai = c(0.7, 0.1, 0.5, 0.1), oma = c(0, 8.5, 0, 0))
# WB
barplot(cov_WB[nrow(cov_WB):1, ], horiz=T, las=1, col = c("grey18","grey90"),
        space=c(1,1,1,1,1,1,1,1,1,1,1,1,4,1,1,1,1,1,1,1,1,1,1,1))
title("WB", line = 1)
# LL
barplot(cov_LL[nrow(cov_LL):1, ], horiz=T, las=1, col = c("grey18","grey90"),
        space=c(1,1,1,1,1,1,1,1,1,1,1,1,4,1,1,1,1,1,1,1,1,1,1,1),
        yaxt = "n", xlab = "Empirical coverage")
title("LL", line = 1)
# YL
barplot(cov_YL[nrow(cov_YL):1, ], horiz = T, las=1, col = c("grey18","grey90"), yaxt = "n",
        space=c(1,1,1,1,1,1,1,1,1,1,1,1,4,1,1,1,1,1,1,1,1,1,1,1))
title("YL", line = 1)
dev.off()
