F_true
# overdispersion de la binomial negativa nos da la diferencia de probabilidad del evento o su negado
set.seed(seed)
# Firstly, create random topology
B <- create_B(n, k)
# Assign proportions to each clone
clone_proportions <- calc_clone_proportions(B, selection)
# Place clones in 1D space
density_coords <- place_clones_space(B)
# Create U matrix
U <- create_U(B = B, clone_proportions = clone_proportions, density_coords = density_coords, m = m)
F_true <- calc_F(U = U, B = B, heterozygous = FALSE)
F_true
# Coarse 1s that have decimals different to 0 in the 16th position to 1
F_true[round(F_true, digits = 15) == 1] <- 1
F_true
depth_values <- map_dbl(1:length(F), function(x) rnbinom(n = 1, mu = depth, size = overdispersion))
depth_values
F <- F_true
set.seed(seed)
# Firstly, create random topology
B <- create_B(n, k)
# Assign proportions to each clone
clone_proportions <- calc_clone_proportions(B, selection)
# Place clones in 1D space
density_coords <- place_clones_space(B)
# Create U matrix
U <- create_U(B = B, clone_proportions = clone_proportions, density_coords = density_coords, m = m)
F_true <- calc_F(U = U, B = B, heterozygous = FALSE)
# Coarse 1s that have decimals different to 0 in the 16th position to 1
F_true[round(F_true, digits = 15) == 1] <- 1
F_true
F <- F_true
depth_values <- map_dbl(1:length(F), function(x) rnbinom(n = 1, mu = depth, size = overdispersion))
depth_values
alt_allele_counts <- map_dbl(1:length(F), function(x) rbinom(n = 1, prob = F[x], size = depth_values[x]))
alt_allele_counts
#ref_allele_counts <- map_dbl(1:length(F), function(x) rbinom(n = 1, prob = 1-F[x], size = depth_values[x]))
alt_allele_mismatch_counts <- map_dbl(1:length(F), function(x) rbinom(n = 1, prob = 0.001, size = alt_allele_counts[x]))
alt_allele_mismatch_counts
ref_to_alt_allele_mismatch_counts <- map_dbl(1:length(F), function(x) rbinom(n = 1, prob = 0.001/3, size = depth_values[x] - alt_allele_counts[x])) # aqui en el size no uso ref_allele_counts porque tengo en cuenta q en el sitio puede haber otro alelo y su cambio hacia el alternativo también nos estaría sumando a las reads alternativas
ref_to_alt_allele_mismatch_counts
noisy_vafs <- matrix(
(alt_allele_counts - alt_allele_mismatch_counts + ref_to_alt_allele_mismatch_counts)/depth_values,
byrow = FALSE, nrow = nrow(F))
noisy_vafs
depth_values
is.na(noisy_vafs)
# For low depth values it can happen that we get depth values of 0s and so we get a NaN after dividing by 0 in the noisy VAF matrix. That would mean a VAF of 0. Hence, we'll convert those NaN values to 0s.
noisy_vafs[is.na(noisy_vafs)] <- 0
noisy_vafs[7,]
n <. 50
n <- 50
m <- 10
k <- 0.5
selection <- "neutral"
noise <- TRUE
depth <- 30
rep <- 3
source("/Users/maitena/Research/clonalDeconvolution/my_algorithm/code/data/generate_instances.R")
root_outdir <- "/Users/maitena/Research/clonalDeconvolution/my_algorithm/data/second_experimentation/parameter_tuning"
seed <- n*(k+1) + rep + 100*as.numeric(factor(selection, levels = c("positive", "neutral")))
instance <- create_instance(n = n, m = m, k = k, selection = selection, noisy = noise, depth = depth, seed = seed)
sum(is.na(instance$F))
sum(is.na(instance$F_true))
instance$F_true
instance$F_
instance$F
filename <- sprintf("n-%s_m-%s_k-%s_sel-%s_noisy-%s_depth-%s_rep-%s_seed-%s", n, m, k, selection, noise, depth, rep, seed)
filename
filepath <- file.path(root_outdir, filename)
filepath
dir.create(filepath)
saveRDS(instance, file = file.path(filepath, sprintf("%s.RDS", filename)))
write.matrix(instance[["F"]], file = file.path(filepath, "F.txt"))
write.matrix(instance[["B"]], file = file.path(filepath, "B.txt"))
n <- 50
m <- 10
k <- 5
selection <- "neutral"
noise <- TRUE
depth <- 30
rep <- 5
source("/Users/maitena/Research/clonalDeconvolution/my_algorithm/code/data/generate_instances.R")
root_outdir <- "/Users/maitena/Research/clonalDeconvolution/my_algorithm/data/second_experimentation/parameter_tuning"
seed <- n*(k+1) + rep + 100*as.numeric(factor(selection, levels = c("positive", "neutral")))
instance <- create_instance(n = n, m = m, k = k, selection = selection, noisy = noise, depth = depth, seed = seed)
sum(is.na(instance$F))
sum(is.na(instance$F_true))
instance$F
filename <- sprintf("n-%s_m-%s_k-%s_sel-%s_noisy-%s_depth-%s_rep-%s_seed-%s", n, m, k, selection, noise, depth, rep, seed)
filepath <- file.path(root_outdir, filename)
dir.create(filepath)
saveRDS(instance, file = file.path(filepath, sprintf("%s.RDS", filename)))
write.matrix(instance[["F"]], file = file.path(filepath, "F.txt"))
write.matrix(instance[["B"]], file = file.path(filepath, "B.txt"))
n <- 50
m <- 4
k <- 0.5
selection <- "neutral"
noise <- TRUE
depth <- 30
rep <- 3
source("/Users/maitena/Research/clonalDeconvolution/my_algorithm/code/data/generate_instances.R")
root_outdir <- "/Users/maitena/Research/clonalDeconvolution/my_algorithm/data/second_experimentation/parameter_tuning"
seed <- n*(k+1) + rep + 100*as.numeric(factor(selection, levels = c("positive", "neutral")))
instance <- create_instance(n = n, m = m, k = k, selection = selection, noisy = noise, depth = depth, seed = seed)
sum(is.na(instance$F))
filename <- sprintf("n-%s_m-%s_k-%s_sel-%s_noisy-%s_depth-%s_rep-%s_seed-%s", n, m, k, selection, noise, depth, rep, seed)
filepath <- file.path(root_outdir, filename)
dir.create(filepath)
saveRDS(instance, file = file.path(filepath, sprintf("%s.RDS", filename)))
write.matrix(instance[["F"]], file = file.path(filepath, "F.txt"))
write.matrix(instance[["B"]], file = file.path(filepath, "B.txt"))
7200/5
7200/5*3
7200/5*4
261*2
1440*2
1440*3
1440*4
source("/Users/maitena/Research/clonalDeconvolution/my_algorithm/code/data/generate_instances.R")
n_vals <- c(10, 25, 50, 100)
k_vals <- c(0.4, 1, 5, 10)
selection_vals <- c("positive", "neutral")
m_vals <- c(2, 4, 6, 10)
noise_vals <- c(FALSE, TRUE)
depth_vals <- c(30, 100, 10000)
reps <- 1:10 #5 repetitions for tuning, 10 for evaluation
root_outdir <- "/Users/maitena/Research/clonalDeconvolution/my_algorithm/data/second_experimentation/evaluation" # change here for tuning and eval
seed <- 0
# .Options
for (n in n_vals) {
for (k in k_vals) {
for (selection in selection_vals) {
for (m in m_vals) {
for (noise in noise_vals) {
for (rep in reps) {
if (noise) {
for (depth in depth_vals) {
seed <- n*(k+1) + rep + 200*as.numeric(factor(selection, levels = c("positive", "neutral"))) # scaling factor: 100 for tuning, 200 for evaluation
instance <- create_instance(n = n, m = m, k = k, selection = selection, noisy = noise, depth = depth, seed = seed)
filename <- sprintf("n-%s_m-%s_k-%s_sel-%s_noisy-%s_depth-%s_rep-%s_seed-%s", n, m, k, selection, noise, depth, rep, seed)
filepath <- file.path(root_outdir, filename)
dir.create(filepath)
saveRDS(instance, file = file.path(filepath, sprintf("%s.RDS", filename)))
write.matrix(instance[["F"]], file = file.path(filepath, "F.txt"))
write.matrix(instance[["B"]], file = file.path(filepath, "B.txt"))
}
} else {
seed <- n*(k+1) + rep + 200*as.numeric(factor(selection, levels = c("positive", "neutral"))) # scaling factor: 100 for tuning, 200 for evaluation
instance <- create_instance(n = n, m = m, k = k, selection = selection, noisy = noise, seed = seed)
filename <- sprintf("n-%s_m-%s_k-%s_sel-%s_noisy-%s_depth-NA_rep-%s_seed-%s", n, m, k, selection, noise, rep, seed)
filepath <- file.path(root_outdir, filename)
dir.create(filepath)
saveRDS(instance, file = file.path(filepath, sprintf("%s.RDS", filename)))
write.matrix(instance[["F"]], file = file.path(filepath, "F.txt"))
write.matrix(instance[["B"]], file = file.path(filepath, "B.txt"))
}
}
}
}
}
}
}
source("/Users/maitena/Research/clonalDeconvolution/my_algorithm/code/data/generate_instances.R")
n_vals <- c(10, 25, 50, 100)
k_vals <- c(0.4, 1, 5, 10)
selection_vals <- c("positive", "neutral")
m_vals <- c(2, 4, 6, 10)
noise_vals <- c(FALSE, TRUE)
depth_vals <- c(30, 100, 10000)
reps <- 1:10 #5 repetitions for tuning, 10 for evaluation
root_outdir <- "/Users/maitena/Research/clonalDeconvolution/my_algorithm/data/second_experimentation/evaluation" # change here for tuning and eval
seed <- 0
# .Options
for (n in n_vals) {
for (k in k_vals) {
for (selection in selection_vals) {
for (m in m_vals) {
for (noise in noise_vals) {
for (rep in reps) {
if (noise) {
for (depth in depth_vals) {
seed <- n*(k+1) + rep + 200*as.numeric(factor(selection, levels = c("positive", "neutral"))) # scaling factor: 100 for tuning, 200 for evaluation
instance <- create_instance(n = n, m = m, k = k, selection = selection, noisy = noise, depth = depth, seed = seed)
filename <- sprintf("n-%s_m-%s_k-%s_sel-%s_noisy-%s_depth-%s_rep-%s_seed-%s", n, m, k, selection, noise, depth, rep, seed)
filepath <- file.path(root_outdir, filename)
dir.create(filepath)
saveRDS(instance, file = file.path(filepath, sprintf("%s.RDS", filename)))
write.matrix(instance[["F"]], file = file.path(filepath, "F.txt"))
write.matrix(instance[["B"]], file = file.path(filepath, "B.txt"))
}
} else {
seed <- n*(k+1) + rep + 200*as.numeric(factor(selection, levels = c("positive", "neutral"))) # scaling factor: 100 for tuning, 200 for evaluation
instance <- create_instance(n = n, m = m, k = k, selection = selection, noisy = noise, seed = seed)
filename <- sprintf("n-%s_m-%s_k-%s_sel-%s_noisy-%s_depth-NA_rep-%s_seed-%s", n, m, k, selection, noise, rep, seed)
filepath <- file.path(root_outdir, filename)
dir.create(filepath)
saveRDS(instance, file = file.path(filepath, sprintf("%s.RDS", filename)))
write.matrix(instance[["F"]], file = file.path(filepath, "F.txt"))
write.matrix(instance[["B"]], file = file.path(filepath, "B.txt"))
}
}
}
}
}
}
}
17000*91/100000
18000*91/100000
51200*2
72*102400
4*4**1*4*2*3*2
4*4**1*4*2*3*2+(4*4*1*4*2**2)
4*4**1*4*2*3*2+(4*4*1*4*2**2)*100
102400/1024
100*4
400/24
46*1024
datadir <- "/Users/maitena/Research/FFPE/manuscript/code/prueba/results"
sample_list <- list.dirs(datadir, full.names = TRUE)
names(sample_list) <- basename(sample_list)
sample_list
sample_list <- list.dirs(datadir, full.names = TRUE)
names(sample_list) <- basename(sample_list)
sample_list <- sample_list[grepl("sample.*[0-9]$", sample_list)]
sample_list
sample_list <- list.dirs(datadir, full.names = TRUE)
names(sample_list) <- basename(sample_list)
sample_list <- sample_list[grepl("sample.*[0-9]$", sample_list)]
sample_list
test_list <- sample_list[grepl("train.*test", sample_list)]
test_list
list.files(path = sample_list, pattern = "*.resubst-error.*.RDS", full.names = TRUE)
list.files(path = sample_list, pattern = "*.resubst-error.*.RDS", full.names = TRUE)
list.files(path = sample_list, pattern = "*.resubst-error.*.RDS", full.names = TRUE) %>%
set_names(sprintf("%s-%s", basename(dirname(.)), basename(.))) %>%
map(readRDS) %>%
map_chr("auc") %>%
enframe() %>%
mutate(value = as.numeric(value)) %>%
mutate(name = gsub("_resubst-error.*", "", name))
library(tidyverse)
list.files(path = sample_list, pattern = "*.resubst-error.*.RDS", full.names = TRUE) %>%
set_names(sprintf("%s-%s", basename(dirname(.)), basename(.))) %>%
map(readRDS) %>%
map_chr("auc") %>%
enframe() %>%
mutate(value = as.numeric(value)) %>%
mutate(name = gsub("_resubst-error.*", "", name))
list.files(path = sample_list, pattern = "*.resubst-error.*.RDS", full.names = TRUE) %>%
set_names(sprintf("%s-%s", basename(dirname(.)), basename(.))) %>%
map(readRDS) %>%
map_chr("auc") %>%
enframe() %>%
mutate(value = as.numeric(value)) %>%
mutate(name = gsub("_resubst-error.*", "", name))
sample_list <- list.dirs(datadir, full.names = TRUE)
names(sample_list) <- basename(sample_list)
sample_list <- sample_list[grepl("sample.*[0-9]$", sample_list)]
list.files(path = sample_list, pattern = "*.resubst-error.*.RDS", full.names = TRUE) %>%
set_names(sprintf("%s-%s", basename(dirname(.)), basename(.))) %>%
map(readRDS) %>%
map_chr("auc") %>%
enframe() %>%
mutate(value = as.numeric(value)) %>%
mutate(name = gsub("_resubst-error.*", "", name))
list.files(path = sample_list, pattern = "*.resubst-error.*.RDS", full.names = TRUE)
list.files(path = sample_list, pattern = "*.resubst-error.*.RDS", full.names = TRUE) %>%
set_names(sprintf("%s-%s", basename(dirname(.)), basename(.))) %>%
map(readRDS) %>%
map_chr("auc") %>%
enframe() %>%
mutate(value = as.numeric(value)) %>%
mutate(name = gsub("_resubst-error.*", "", name))
list.files(path = sample_list, pattern = "*.resubst-error.*.RDS", full.names = TRUE) %>%
set_names(sprintf("%s-%s", basename(dirname(.)), basename(.))) %>%
map(readRDS) %>%
map_chr("auc") %>%
enframe() %>%
mutate(value = as.numeric(value)) %>%
mutate(name = gsub("_resubst-error.*", "", name)) %>%
separate(name, into = c("sample", "model"), sep = "-")
list.files(path = sample_list, pattern = "*.resubst-error.*.RDS", full.names = TRUE) %>%
set_names(sprintf("%s-%s", basename(dirname(.)), basename(.))) %>%
map(readRDS) %>%
map_chr("auc") %>%
enframe() %>%
mutate(value = as.numeric(value)) %>%
mutate(name = gsub("_resubst-error.*", "", name)) %>%
separate(name, into = c("sample", "model"), sep = "-") %>%
mutate(sample = gsub("_[^_]+$", "", sample))
list.files(path = sample_list, pattern = "*.resubst-error.*.RDS", full.names = TRUE) %>%
set_names(sprintf("%s-%s", basename(dirname(.)), basename(.))) %>%
map(readRDS) %>%
map_chr("auc") %>%
enframe() %>%
mutate(value = as.numeric(value)) %>%
mutate(name = gsub("_resubst-error.*", "", name)) %>%
separate(name, into = c("sample", "model"), sep = "-") %>%
mutate(sample = gsub("_[^_]+$", "", sample)) %>%
rename(ROC = value)
sample_list <- list.dirs(datadir, full.names = TRUE)
names(sample_list) <- basename(sample_list)
sample_list <- sample_list[grepl("sample.*[0-9]$", sample_list)]
test_list <- sample_list[grepl("train.*test", sample_list)]
test_list
list.files(path = test_list, pattern = "*.test-error.*.RDS", full.names = TRUE) %>%
set_names(sprintf("%s-%s", basename(dirname(.)), basename(.))) %>%
map(readRDS) %>%
map_chr("auc") %>%
enframe() %>%
mutate(value = as.numeric(value)) %>%
mutate(name = gsub("_test-error.*", "", name)) %>%
separate(name, into = c("sample", "model"), sep = "-")
list.files(path = test_list, pattern = "*.test-error.*.RDS", full.names = TRUE) %>%
set_names(sprintf("%s-%s", basename(dirname(.)), basename(.))) %>%
map(readRDS) %>%
map_chr("auc") %>%
enframe() %>%
mutate(value = as.numeric(value)) %>%
mutate(name = gsub("_test-error.*", "", name)) %>%
separate(name, into = c("sample", "model"), sep = "-") %>%
mutate(sample = gsub("_[^_]+$", "", sample))
list.files(path = test_list, pattern = "*.test-error.*.RDS", full.names = TRUE) %>%
set_names(sprintf("%s-%s", basename(dirname(.)), basename(.))) %>%
map(readRDS) %>%
map_chr("auc") %>%
enframe() %>%
mutate(value = as.numeric(value)) %>%
mutate(name = gsub("_test-error.*", "", name)) %>%
separate(name, into = c("sample", "model"), sep = "-") %>%
mutate(sample = gsub("_[^_]+$", "", sample)) %>%
rename(ROC = value)
list.files(path = test_list, pattern = "*.test-error.*.RDS", full.names = TRUE) %>%
set_names(sprintf("%s-%s", basename(dirname(.)), basename(.))) %>%
map(readRDS) %>%
map_chr("auc") %>%
enframe() %>%
mutate(value = as.numeric(value)) %>%
mutate(name = gsub("_test-error.*", "", name))
list.files(path = test_list, pattern = "*.test-error.*.RDS", full.names = TRUE) %>%
set_names(sprintf("%s-%s", basename(dirname(.)), basename(.))) %>%
map(readRDS) %>%
map_chr("auc") %>%
enframe() %>%
mutate(value = as.numeric(value))
list.files(path = test_list, pattern = "*.test-error.*.RDS", full.names = TRUE) %>%
set_names(sprintf("%s-%s", basename(dirname(.)), basename(.))) %>%
map(readRDS) %>%
map_chr("auc") %>%
enframe() %>%
mutate(value = as.numeric(value)) %>%
mutate(name = gsub("_test-error.*", "", name)) %>%
separate(name, into = c("sample", "model"), sep = "-")
list.files(path = test_list, pattern = "*.test-error.*.RDS", full.names = TRUE) %>%
set_names(sprintf("%s-%s", basename(dirname(.)), basename(.))) %>%
map(readRDS) %>%
map_chr("auc") %>%
enframe() %>%
mutate(value = as.numeric(value)) %>%
mutate(name = gsub("_test-error.*", "", name)) %>%
separate(name, into = c("sample", "model"), sep = "-") %>%
mutate(sample = gsub("_[^_]+$", "", sample))
list.files(path = sample_list, pattern = "*.resubst-error.*.RDS", full.names = TRUE) %>%
set_names(sprintf("%s-%s", basename(dirname(.)), basename(.))) %>%
map(readRDS) %>%
map_chr("auc") %>%
enframe() %>%
mutate(value = as.numeric(value)) %>%
mutate(name = gsub("_resubst-error.*", "", name)) %>%
separate(name, into = c("sample", "model"), sep = "-") %>%
mutate(sample = gsub("_[^_]+$", "", sample)) %>%
rename(ROC = value)
sample_list <- list.dirs(datadir, full.names = TRUE)
names(sample_list) <- basename(sample_list)
sample_list <- sample_list[grepl("sample.*[0-9]$", sample_list)]
test_list <- sample_list[grepl("train.*test", sample_list)]
test_data <- list.files(path = test_list, pattern = "*.test-error.*.RDS", full.names = TRUE) %>%
set_names(sprintf("%s-%s", basename(dirname(.)), basename(.))) %>%
map(readRDS) %>%
map_chr("auc") %>%
enframe() %>%
mutate(value = as.numeric(value)) %>%
mutate(name = gsub("_test-error.*", "", name)) %>%
separate(name, into = c("sample", "model"), sep = "-") %>%
mutate(sample = gsub("_[^_]+$", "", sample)) %>%
rename(ROC = value)
test_data
resubst_data <- list.files(path = sample_list, pattern = "*.resubst-error.*.RDS", full.names = TRUE) %>%
set_names(sprintf("%s-%s", basename(dirname(.)), basename(.))) %>%
map(readRDS) %>%
map_chr("auc") %>%
enframe() %>%
mutate(value = as.numeric(value)) %>%
mutate(name = gsub("_resubst-error.*", "", name)) %>%
separate(name, into = c("sample", "model"), sep = "-") %>%
mutate(sample = gsub("_[^_]+$", "", sample)) %>%
rename(ROC = value)
resubst_data
list.files(path = sample_list, pattern = "[xgbTree|logReg|NB|NN]_model_[0-9].*.RDS", full.names = TRUE) %>%
set_names(sprintf("%s-%s", basename(dirname(.)), basename(.))) %>%
map_dfr(readRDS, .id = "name") %>%
mutate(name = gsub("_model.*", "", name)) %>%
separate(name, into = c("sample", "model"), sep = "-")
list.files(path = sample_list, pattern = "[xgbTree|logReg|NB|NN]_model_[0-9].*.RDS", full.names = TRUE) %>%
set_names(sprintf("%s-%s", basename(dirname(.)), basename(.))) %>%
map_dfr(readRDS, .id = "name") %>%
mutate(name = gsub("_model.*", "", name)) %>%
separate(name, into = c("sample", "model"), sep = "-") %>%
mutate(sample = gsub("_[^_]+$", "", sample))
kcv_data_nonRF <- list.files(path = sample_list, pattern = "[xgbTree|logReg|NB|NN]_model_[0-9].*.RDS", full.names = TRUE) %>%
set_names(sprintf("%s-%s", basename(dirname(.)), basename(.))) %>%
map_dfr(readRDS, .id = "name") %>%
mutate(name = gsub("_model.*", "", name)) %>%
separate(name, into = c("sample", "model"), sep = "-") %>%
mutate(sample = gsub("_[^_]+$", "", sample))
kcv_data_nonRF
list.files(path = sample_list, pattern = "RF_model_[0-9].*.RDS", full.names = TRUE) %>%
set_names(sprintf("%s-%s", basename(dirname(.)), basename(.))) %>%
map_dfr(function(x) {
data <- readRDS(x)
data@model$cross_validation_metrics_summary["auc",][-(1:2)]
}, .id = "name") %>%
gather(Resample, ROC, cv_1_valid:cv_10_valid) %>%
mutate(ROC = as.numeric(ROC)) %>%
mutate(Resample = case_when(
Resample == "cv_1_valid" ~ "Fold01",
Resample == "cv_2_valid" ~ "Fold02",
Resample == "cv_3_valid" ~ "Fold03",
Resample == "cv_4_valid" ~ "Fold04",
Resample == "cv_5_valid" ~ "Fold05",
Resample == "cv_6_valid" ~ "Fold06",
Resample == "cv_7_valid" ~ "Fold07",
Resample == "cv_8_valid" ~ "Fold08",
Resample == "cv_9_valid" ~ "Fold09",
Resample == "cv_10_valid" ~ "Fold10")) %>%
mutate(name = gsub("_model.*", "", name)) %>%
separate(name, into = c("sample", "model"), sep = "-") %>%
mutate(sample = gsub("_[^_]+$", "", sample))
kcv_data_RF <- list.files(path = sample_list, pattern = "RF_model_[0-9].*.RDS", full.names = TRUE) %>%
set_names(sprintf("%s-%s", basename(dirname(.)), basename(.))) %>%
map_dfr(function(x) {
data <- readRDS(x)
data@model$cross_validation_metrics_summary["auc",][-(1:2)]
}, .id = "name") %>%
gather(Resample, ROC, cv_1_valid:cv_10_valid) %>%
mutate(ROC = as.numeric(ROC)) %>%
mutate(Resample = case_when(
Resample == "cv_1_valid" ~ "Fold01",
Resample == "cv_2_valid" ~ "Fold02",
Resample == "cv_3_valid" ~ "Fold03",
Resample == "cv_4_valid" ~ "Fold04",
Resample == "cv_5_valid" ~ "Fold05",
Resample == "cv_6_valid" ~ "Fold06",
Resample == "cv_7_valid" ~ "Fold07",
Resample == "cv_8_valid" ~ "Fold08",
Resample == "cv_9_valid" ~ "Fold09",
Resample == "cv_10_valid" ~ "Fold10")) %>%
mutate(name = gsub("_model.*", "", name)) %>%
separate(name, into = c("sample", "model"), sep = "-") %>%
mutate(sample = gsub("_[^_]+$", "", sample))
kcv_data <- bind_rows(kcv_data_nonRF, kcv_data_RF)
kcv_data
all_data <- bind_rows(resubst_data, select(kcv_data, ROC, sample, model), .id = "origin")
all_data
all_data <- bind_rows(resubst_data, select(kcv_data, ROC, sample, model), .id = "origin")
all_data %>%
mutate(origin = ifelse(origin == "1", "resubstitution", "kcv")) -> all_data
test_data %>%
mutate(origin = "sample-out") -> test_data
all_data <- bind_rows(all_data, test_data)
all_data
all_data %>%
mutate(idx = gsub("sample", "", sample) %>%
as.numeric())  %>%
mutate(model = ifelse(model == "xgbTree", "XGBoost", model)) %>%
arrange(idx) %>%
mutate(sample = factor(sample, levels = unique(sample))) %>%
select(-idx) -> all_data
all_data %>%
mutate(idx = gsub("sample", "", sample) %>%
as.numeric())  %>%
mutate(model = ifelse(model == "xgbTree", "XGBoost", model))
all_data %>%
mutate(idx = gsub("sample", "", sample) %>%
as.numeric())  %>%
mutate(model = ifelse(model == "xgbTree", "XGBoost", model)) %>%
arrange(idx) %>%
mutate(sample = factor(sample, levels = unique(sample))) %>%
select(-idx) -> all_data
all_data
getwd()
setwd("/Users/maitena/Research/FFPE/manuscript/code/colon_liver")
library(tidyverse)
library(reshape2)
library(ggsci)
library(ggpubr)
## Some common data ------------------------------
# Change all the filepaths accordingly
Bonnet_samplenames_filename <- "maxwell_FFPE_FF_tumor_pairs.csv"
Bonnet_samplenames <- read_csv(Bonnet_samplenames_filename, col_names = c("FFPE", "FF", "tissue", "remove1", "remove2")) %>%
select(FFPE, FF, tissue) %>%
mutate(sample = paste0(tissue, rep(1:2, times = 2))) %>%
select(-tissue)
Bonnet_samplenames
read_csv(Bonnet_samplenames_filename, col_names = c("FFPE", "FF", "tissue", "remove1", "remove2")
)
getwd()
readLines("run_accessions.txt")
readLines("run_accessions.txt")
