
## theme for ggplot
myTheme <- theme(axis.text = element_text(size = 20), axis.title =  element_text(size = 20),
                 plot.title = element_text(size = 20, hjust = 0.5), legend.text = element_text(size= 18),
                 strip.text = element_text(size = 18, hjust = 0.5),
                 legend.title = element_text(size = 18), legend.background = element_blank())

# setting ggplot theme for rest fo the plots
theme_set(theme_bw())

fancy_scientific <- function(l) {
  # turn in to character string in scientific notation
  l <- format(l, scientific = TRUE)
  # quote the part before the exponent to keep all the digits
  l <- gsub("^(.*)e", "'\\1'e", l)
  # remove + after exponent, if exists. E.g.: (e^+2 -> e^2)
  l <- gsub("e\\+","e",l)  
  # turn the 'e' into plotmath format
  l <- gsub("e", "%*%10^", l)
  # convert 1x10^ or 1.000x10^ -> 10^
  l <- gsub("\\'1[\\.0]*\\'\\%\\*\\%", "", l)
  # return this as an expression
  parse(text=l)
}

log10minorbreaks=as.numeric(1:10 %o% 10^(4:8))


###
####
###
## loading observed data sets for plotting
counts_file <- file.path("data", "Counts_Treg.csv")
counts_data <- read.csv(counts_file) %>% 
  arrange(age.at.S1K) %>%
  mutate(ageBMT_bin = ifelse(age.at.BMT <= 56, 'agebin1',
                             ifelse(age.at.BMT <= 70, 'agebin2',
                                    ifelse(age.at.BMT <= 84, 'agebin3', 'agebin4')))) 

Nfd_file <- file.path("data", "Nfd_Treg.csv")
Nfd_data <- read.csv(Nfd_file) %>% 
  arrange(age.at.S1K)%>%
  mutate(ageBMT_bin = ifelse(age.at.BMT <= 56, 'agebin1',
                             ifelse(age.at.BMT <= 70, 'agebin2',
                                    ifelse(age.at.BMT <= 84, 'agebin3', 'agebin4')))) %>%
  select(-contains("chi"))

hostki_file <- file.path("data", "hostKi67_Treg.csv")
hostki_data <- read.csv(hostki_file) %>% 
  arrange(age.at.S1K) %>%
  mutate(ageBMT_bin = ifelse(age.at.BMT <= 56, 'agebin1',
                             ifelse(age.at.BMT <= 70, 'agebin2',
                                    ifelse(age.at.BMT <= 84, 'agebin3', 'agebin4'))),
         subcomp='Host') 

donorki_file <- file.path("data", "donorKi67_Treg.csv")
donorki_data <- read.csv(donorki_file) %>% 
  arrange(age.at.S1K) %>%
  mutate(ageBMT_bin = ifelse(age.at.BMT <= 56, 'agebin1',
                             ifelse(age.at.BMT <= 70, 'agebin2',
                                    ifelse(age.at.BMT <= 84, 'agebin3', 'agebin4'))),
         subcomp='Donor') 

## merged ki data
ki_data <- rbind(donorki_data, hostki_data)

###
####
###

# time sequence for predictions specific to age bins within the data
ts_pred1 <- 10^seq(log10(66), log10(450), length.out = 300)
ts_pred2 <- 10^seq(log10(91), log10(450), length.out = 300)
ts_pred3 <- 10^seq(log10(90), log10(450), length.out = 300)
 ts_pred4 <- 10^seq(log10(174), log10(450), length.out = 300)
# ts_pred1 <- 10^seq(log10(49), log10(450), length.out = 300)
# ts_pred2 <- 10^seq(log10(66), log10(450), length.out = 300)
# ts_pred3 <- 10^seq(log10(77), log10(450), length.out = 300)
# ts_pred4 <- 10^seq(log10(128), log10(450), length.out = 300)
tb_pred1 <- rep(49, 300)
tb_pred2 <- rep(66, 300)
tb_pred3 <- rep(77, 300)
tb_pred4 <- rep(128, 300)

ts_pred <- c(ts_pred1, ts_pred2, ts_pred3, ts_pred4)


plot_function <- function(fitobject){
  # naive Treg counts in the thymus with 90% envelopes
  Counts_naive_pred <- as.data.frame(fitobject, pars = c("counts_naive_mean_pred1", "counts_naive_mean_pred2",
                                                   "counts_naive_mean_pred3", "counts_naive_mean_pred4")) %>%
    gather(factor_key = TRUE) %>%
    group_by(key) %>%
    summarize(lb = quantile(value, probs = 0.05),
              median = quantile(value, probs = 0.5),
              ub = quantile(value, probs = 0.95)) %>%
    bind_cols("timeseries" = c(ts_pred1, ts_pred2, ts_pred3, ts_pred4))%>%
    mutate(ageBMT_bin = ifelse(grepl("pred1", key),"agebin1",
                               ifelse(grepl("pred2", key), "agebin2",
                                      ifelse(grepl("pred3", key), "agebin3", "agebin4"))),
           location = "Thymus") 
  
  # naive Treg counts in the thymus with 90% envelopes
  Counts_naive_withsigma <- as.data.frame(fitobject, pars = c("counts_naive_pred1", "counts_naive_pred2",
                                                        "counts_naive_pred3", "counts_naive_pred4")) %>%
    gather(factor_key = TRUE) %>%
    group_by(key) %>%
    summarize(lb = quantile(value, probs = 0.16),
              median = quantile(value, probs = 0.5),
              ub = quantile(value, probs = 0.84))%>%
    bind_cols("timeseries" = c(ts_pred1, ts_pred2, ts_pred3, ts_pred4)) %>%
    mutate(ageBMT_bin = ifelse(grepl("pred1", key),"agebin1",
                               ifelse(grepl("pred2", key), "agebin2",
                                      ifelse(grepl("pred3", key), "agebin3", "agebin4"))),
           location = "Thymus")   
  
  ### selecting rows closest to observed timpoints
  find_nearest_time <- function(test, target_vec){
    target.index <- which(abs(target_vec - test) == min(abs(target_vec - test)))
    target_vec[target.index]
  }
  
  ### selecting rows closest to observed timpoints
  obstimes1 <- sapply(counts_data$age.at.S1K[counts_data$ageBMT_bin == "agebin1"],
                      find_nearest_time, target_vec = ts_pred1)
  obstimes2 <- sapply(counts_data$age.at.S1K[counts_data$ageBMT_bin == "agebin2"],
                      find_nearest_time, target_vec = ts_pred2)
  obstimes3 <- sapply(counts_data$age.at.S1K[counts_data$ageBMT_bin == "agebin3"],
                      find_nearest_time, target_vec = ts_pred3)
  obstimes4 <- sapply(counts_data$age.at.S1K[counts_data$ageBMT_bin == "agebin4"],
                      find_nearest_time, target_vec = ts_pred4)
  
  
  Counts_naive_sigma_obs <<- data.frame(ageBMT_bin = c(rep("agebin1", length(obstimes1)),
                                                      rep("agebin2", length(obstimes2)),
                                                      rep("agebin3", length(obstimes3)),
                                                      rep("agebin4", length(obstimes4))),
                                       timeseries = c(obstimes1, obstimes2,
                                                      obstimes3, obstimes4)) %>%
    #arrange(ageBMT_bin) %>%
    left_join(Counts_naive_withsigma, by=c("ageBMT_bin", "timeseries")) 
  
  # Nfd in thymic naive Tregs with 90% envelopes
  Nfd_naive_pred <- as.data.frame(fitobject, pars = c("Nfd_naive_mean_pred3")) %>%
    gather(factor_key = TRUE) %>%
    group_by(key) %>%
    summarize(lb = quantile(value, probs = 0.05),
              median = quantile(value, probs = 0.5),
              ub = quantile(value, probs = 0.95))  %>%
    bind_cols("timeseries" = c(ts_pred3 - tb_pred3))%>%
    mutate(ageBMT_bin = ifelse(grepl("pred1", key),"agebin1",
                               ifelse(grepl("pred2", key), "agebin2",
                                      ifelse(grepl("pred3", key), "agebin3", "agebin4"))),
           location = "Naive")   
  
  #Nfd_pred <- rbind(Nfd_naive_pred)
  # naive Treg Nfd in naive Tregs with 90% envelopes
  Nfd_naive_withsigma <- as.data.frame(fitobject, pars = c("Nfd_naive_pred1", "Nfd_naive_pred2",
                                                     "Nfd_naive_pred3", "Nfd_naive_pred4")) %>%
    gather(factor_key = TRUE) %>%
    group_by(key) %>%
    summarize(lb = quantile(value, probs = 0.16),
              median = quantile(value, probs = 0.5),
              ub = quantile(value, probs = 0.84))%>%
    bind_cols("timeseries" = c(ts_pred1 - tb_pred1,
                               ts_pred2 - tb_pred2,
                               ts_pred3 - tb_pred3,
                               ts_pred4 - tb_pred4)) %>%
    mutate(ageBMT_bin = ifelse(grepl("pred1", key),"agebin1",
                               ifelse(grepl("pred2", key), "agebin2",
                                      ifelse(grepl("pred3", key), "agebin3", "agebin4"))),
           location = "Naive")   
  
  ### selecting rows closest to observed timpoints
  obstimes1 <- sapply(Nfd_data$age.at.S1K[Nfd_data$ageBMT_bin == "agebin1"],
                      find_nearest_time, target_vec = ts_pred1)
  obstimes2 <- sapply(Nfd_data$age.at.S1K[Nfd_data$ageBMT_bin == "agebin2"],
                      find_nearest_time, target_vec = ts_pred2)
  obstimes3 <- sapply(Nfd_data$age.at.S1K[Nfd_data$ageBMT_bin == "agebin3"],
                      find_nearest_time, target_vec = ts_pred3)
  obstimes4 <- sapply(Nfd_data$age.at.S1K[Nfd_data$ageBMT_bin == "agebin4"],
                      find_nearest_time, target_vec = ts_pred4)
  
  
  Nfd_naive_sigma_obs <<- data.frame(ageBMT_bin = c(rep("agebin1", length(obstimes1)),
                                                   rep("agebin2", length(obstimes2)),
                                                   rep("agebin3", length(obstimes3)),
                                                   rep("agebin4", length(obstimes4))),
                                    timeseries = c(obstimes1, obstimes2,
                                                   obstimes3, obstimes4)) %>%
    #arrange(ageBMT_bin) %>%
    left_join(Nfd_naive_withsigma, by=c("ageBMT_bin", "timeseries")) 
  
  
  #### KI plots
  
  # Proportion of Ki67hi in donor thymic naive Tregs with 90% envelopes
  ki_donor_naive_pred <- as.data.frame(fitobject, pars = c("ki_donor_naive_mean_pred1", 
                                                           "ki_donor_naive_mean_pred2",
                                                           "ki_donor_naive_mean_pred3", 
                                                           "ki_donor_naive_mean_pred4")) %>%
    gather(factor_key = TRUE) %>%
    group_by(key) %>%
    summarize(lb = quantile(value, probs = 0.05),
              median = quantile(value, probs = 0.5),
              ub = quantile(value, probs = 0.95)) %>%
    bind_cols("timeseries" = c(ts_pred1 - tb_pred1,
                               ts_pred2 - tb_pred2,
                               ts_pred3 - tb_pred3,
                               ts_pred4 - tb_pred4))%>%
    mutate(ageBMT_bin = ifelse(grepl("pred1", key),"agebin1",
                               ifelse(grepl("pred2", key), "agebin2",
                                      ifelse(grepl("pred3", key), "agebin3", "agebin4"))),
           location = "Naive", 
           subcomp = 'Donor')  
  
  
  # naive Treg Nfd in naive Tregs with 90% envelopes
  ki_donor_naive_withsigma <- as.data.frame(fitobject, pars = c("ki_donor_naive_pred3")) %>%
    gather(factor_key = TRUE) %>%
    group_by(key) %>%
    summarize(lb = quantile(value, probs = 0.16),
              median = quantile(value, probs = 0.5),
              ub = quantile(value, probs = 0.84))%>%
    bind_cols("timeseries" = c(ts_pred3)) %>%
    mutate(ageBMT_bin = ifelse(grepl("pred1", key),"agebin1",
                               ifelse(grepl("pred2", key), "agebin2",
                                      ifelse(grepl("pred3", key), "agebin3", "agebin4"))),
           location = "Naive", 
           subcomp = 'Donor')   
  
  ### selecting rows closest to observed timpoints
  obstimes1 <- sapply(ki_data$age.at.S1K[ki_data$ageBMT_bin == "agebin1"],
                      find_nearest_time, target_vec = ts_pred1)
  obstimes2 <- sapply(ki_data$age.at.S1K[ki_data$ageBMT_bin == "agebin2"],
                      find_nearest_time, target_vec = ts_pred2)
  obstimes3 <- sapply(ki_data$age.at.S1K[ki_data$ageBMT_bin == "agebin3"],
                      find_nearest_time, target_vec = ts_pred3)
  obstimes4 <- sapply(ki_data$age.at.S1K[ki_data$ageBMT_bin == "agebin4"],
                      find_nearest_time, target_vec = ts_pred4)
  
  ### selecting rows closest to observed 
  ki_donor_naive_sigma_obs <<- data.frame(ageBMT_bin = c(rep("agebin1", length(obstimes1)),
                                                        rep("agebin2", length(obstimes2)),
                                                        rep("agebin3", length(obstimes3)),
                                                        rep("agebin4", length(obstimes4))),
                                         timeseries = c(obstimes1, obstimes2,
                                                        obstimes3, obstimes4)) %>%
    #arrange(ageBMT_bin) %>%
    left_join(ki_donor_naive_withsigma, by=c("ageBMT_bin", "timeseries"))
  
  
  # Proportion of Ki67hi in host thymic naive Tregs with 90% envelopes
  ki_host_naive_pred <- as.data.frame(fitobject, pars = c("ki_host_naive_mean_pred1", 
                                                          "ki_host_naive_mean_pred2",
                                                          "ki_host_naive_mean_pred3", 
                                                          "ki_host_naive_mean_pred4")) %>%
    gather(factor_key = TRUE) %>%
    group_by(key) %>%
    summarize(lb = quantile(value, probs = 0.05),
              median = quantile(value, probs = 0.5),
              ub = quantile(value, probs = 0.95)) %>%
    bind_cols("timeseries" = c(ts_pred1 - tb_pred1,
                               ts_pred2 - tb_pred2,
                               ts_pred3 - tb_pred3,
                               ts_pred4 - tb_pred4))%>%
    mutate(ageBMT_bin = ifelse(grepl("pred1", key),"agebin1",
                               ifelse(grepl("pred2", key), "agebin2",
                                      ifelse(grepl("pred3", key), "agebin3", "agebin4"))),
           location = "Thymus", 
           subcomp = 'Host')   
  
  # naive Treg Nfd in naive Tregs with 90% envelopes
  ki_host_naive_withsigma <- as.data.frame(fitobject, pars = c( "ki_host_naive_pred1", "ki_host_naive_pred2",
                                                          "ki_host_naive_pred3", "ki_host_naive_pred4")) %>%
    gather(factor_key = TRUE) %>%
    group_by(key) %>%
    summarize(lb = quantile(value, probs = 0.16),
              median = quantile(value, probs = 0.5),
              ub = quantile(value, probs = 0.84))%>%
    bind_cols("timeseries" = c(ts_pred1,
                               ts_pred2,
                               ts_pred3,
                               ts_pred4)) %>%
    mutate(ageBMT_bin = ifelse(grepl("pred1", key),"agebin1",
                               ifelse(grepl("pred2", key), "agebin2",
                                      ifelse(grepl("pred3", key), "agebin3", "agebin4"))),
           location = "Naive", 
           subcomp = 'Host')   
  
  ### selecting rows closest to observed 
  ki_host_naive_sigma_obs <<- data.frame(ageBMT_bin = c(rep("agebin1", length(obstimes1)),
                                                       rep("agebin2", length(obstimes2)),
                                                       rep("agebin3", length(obstimes3)),
                                                       rep("agebin4", length(obstimes4))),
                                        timeseries = c(obstimes1, obstimes2,
                                                       obstimes3, obstimes4)) %>%
    #arrange(ageBMT_bin) %>%
    left_join(ki_host_naive_withsigma, by=c("ageBMT_bin", "timeseries"))
  
  Counts_pred <<- Counts_naive_pred
  Nfd_pred <<- Nfd_naive_pred
  ki_naive_pred <<- rbind(ki_donor_naive_pred, ki_host_naive_pred)
  
}



