Below is reported the experimental analysis to evaluate the performance of reduction intensity methods applied to blade and bladelet cores. All steps of data analysis and visualization are reported to allow optimal reproducibility.
library(Hmisc)
library(readr)
library(readxl)
library(officer)
library(plyr)
library(dplyr)
library(ggplot2)
library(reshape2)
library(tidyr)
library(ggplot2)
library(BlandAltmanLeh)
library(gridExtra)
library(blandr)
experimental_flakes <- readxl::read_xlsx("experimental_flakes.xlsx")
experimental_cores <- readxl::read_xlsx("experimental_cores.xlsx")
# corrdimslength takes gen (generation) and median as inputs and returns the corrected length of the core by multiplying gen and median
corrdimslength <- function(gen, median) {
gen * median
}
# corrdimslength2 takes gen1, gen2, and dim as inputs and returns the corrected length of the core by adding gen1, gen2, and dim.
corrdimslength2 <- function(gen1, gen2, dim) {
gen1 + gen2 + dim
}
# corrdims takes gen, median, and dim as inputs and returns the corrected dimensions of the core by multiplying gen and median and adding dim.
corrdims <- function(gen, median, dim) {
gen * median + dim
}
experimental_flakes %>%
filter(plat_cor == "1") %>%
summarise(mean_thickness = mean(Max_thickness),
median_thickness = median(Max_thickness),
shapiro_test = shapiro.test(Max_thickness)$p.value) %>%
{median_platform <<- median(.$median_thickness)}
experimental_flakes %>%
filter(dim_cor == "2") %>%
summarise(mean_thickness = mean(Max_thickness),
median_thickness = median(Max_thickness),
shapiro_test = shapiro.test(Max_thickness)$p.value) %>%
{median_base <<- median(.$median_thickness)}
experimental_cores<-experimental_cores %>%
mutate(Volume = Volume/1000) %>%
filter(Original == "No")
experimental_cores <- experimental_cores %>%
mutate(corrected_length_platform = corrdimslength(Gen_Length_plat, median_platform),
corrected_length_base = corrdimslength(Gen_Length_base, median_base),
corrected_length = corrdimslength2(corrected_length_platform, corrected_length_base, Max_Length),
corrected_width = corrdims(Gen_Width, median_base, Max_Width),
corrected_thickness = corrdims(Gen_Thickness, median_base, Max_Thickness))
experimental_cores
## # A tibble: 42 × 33
## Core_ID Origi…¹ Volume Surface Weight Max_L…² Max_W…³ Max_T…⁴ Gen_L…⁵ Gen_L…⁶
## <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 1.1 No 176. 20206. 453. 100. 48.8 84.5 0 0
## 2 1.2 No 142. 16850. 365. 92.6 48.5 74.2 0 0
## 3 1.3 No 106. 14324. 271. 85.4 48.2 60.6 0 0
## 4 1.4 No 85.8 11982. 220. 74.8 48.2 57.2 0 0
## 5 1.5 No 72.7 10683. 187. 72.2 46.6 56.3 0 0
## 6 1.6 No 51.0 8664. 131. 64.7 44.7 55.9 0 0
## 7 1.7 No 35.4 6797. 91.1 59.6 38.3 53.3 1 0
## 8 1.8 No 24.5 5296. 63.0 50.9 28.9 42.5 2 0
## 9 1.9 No 14.6 3817. 37.5 45.6 21.0 38.0 2 0
## 10 2.1 No 115. 14685. 297. 54 45.3 67.8 0 0
## # … with 32 more rows, 23 more variables: Gen_Width <dbl>, Gen_Thickness <dbl>,
## # Original_Volume <dbl>, ...14 <dbl>, Original_surface <dbl>,
## # Scars_number_flaking_surface <dbl>, Scars_number_platform <dbl>,
## # Scars_number_all <dbl>, `Hinges_number_flaking_surface_>5mm` <dbl>,
## # `Hinges_number_platform_>5mm` <dbl>, `Hinges_number_all_>5mm` <dbl>,
## # Platform_number <dbl>, Number_of_rotations <dbl>,
## # Faces_exploited_number <dbl>, Cortical_surface <dbl>, …
geometries <- experimental_cores %>%
transmute(
Experimental_data = Original_Volume / 1000,
Prism = corrected_width * corrected_thickness * corrected_length / 1000,
Cube = corrected_length^3 / 1000,
Sphere = 4/3 * pi * ((corrected_length/2)^3) / 1000,
Cylinder = pi * ((corrected_width/2)^2) * corrected_length / 1000,
Ellipsoid = 4/3 * pi * ((corrected_length/2) * (corrected_width/2) * (corrected_thickness/2)) / 1000
)
geometries
## # A tibble: 42 × 6
## Experimental_data Prism Cube Sphere Cylinder Ellipsoid
## <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 235. 453. 1014. 531. 188. 237.
## 2 235. 369. 794. 416. 171. 193.
## 3 235. 349. 622. 326. 212. 182.
## 4 235. 311. 418. 219. 213. 163.
## 5 235. 309. 377. 197. 222. 162.
## 6 235. 249. 271. 142. 164. 130.
## 7 235. 222. 257. 135. 127. 116.
## 8 235. 170. 204. 107. 77.3 88.8
## 9 235. 110. 154. 80.8 46.0 57.5
## 10 161. 238. 157. 82.4 159. 125.
## # … with 32 more rows
geometries_summary <- data.frame(
length = round(sapply(geometries, length), 2),
mean = round(sapply(geometries, mean), 2),
median = round(sapply(geometries, median), 2),
min= round(sapply(geometries, min), 2),
max= round(sapply(geometries, max), 2),
sd = round(sapply(geometries, sd), 2),
cv = round(sapply(geometries, function(x) sd(x) / mean(x)), 2),
shapiro_wilk = round(sapply(geometries, function(x) shapiro.test(x)$p.value), 7)
)
geometries_summary
## length mean median min max sd cv shapiro_wilk
## Experimental_data 42 189.41 183.01 38.77 425.62 96.91 0.51 0.0094339
## Prism 42 274.49 224.12 70.20 1032.43 193.02 0.70 0.0000121
## Cube 42 297.24 259.64 46.46 1109.04 245.15 0.82 0.0000281
## Sphere 42 155.63 135.95 24.33 580.69 128.36 0.82 0.0000281
## Cylinder 42 208.41 161.17 46.02 1343.43 211.27 1.01 0.0000000
## Ellipsoid 42 143.72 117.35 36.76 540.58 101.07 0.70 0.0000121
Perc_remaining_volume <- (experimental_cores$Volume / geometries * 100)
Perc_extracted_volume <- 100 - Perc_remaining_volume
Perc_extracted_volume_summary <- data.frame(
length = round(sapply(Perc_extracted_volume, length), 2),
mean = round(sapply(Perc_extracted_volume, mean), 2),
median = round(sapply(Perc_extracted_volume, median), 2),
min= round(sapply(Perc_extracted_volume, min), 2),
max= round(sapply(Perc_extracted_volume, max), 2),
sd = round(sapply(Perc_extracted_volume, sd), 2),
cv = round(sapply(Perc_extracted_volume, function(x) sd(x) / mean(x)), 2),
shapiro_wilk = round(sapply(Perc_extracted_volume, function(x) shapiro.test(x)$p.value), 6)
)
Perc_extracted_volume_summary
## length mean median min max sd cv shapiro_wilk
## Experimental_data 42 61.25 65.26 21.05 95.11 22.44 0.37 0.023203
## Prism 42 76.18 76.99 51.43 87.80 7.90 0.10 0.020038
## Cube 42 69.80 76.99 -16.90 93.61 24.20 0.35 0.000001
## Sphere 42 42.33 56.06 -123.26 87.80 46.22 1.09 0.000001
## Cylinder 42 64.41 70.27 6.05 87.78 19.23 0.30 0.000094
## Ellipsoid 42 54.51 56.06 7.24 76.69 15.10 0.28 0.020038
correlations_pearson <- rcorr(as.matrix(geometries), type = "pearson")
correlations_spearman <- rcorr(as.matrix(geometries), type = "spearman")
experimental_cores <- experimental_cores %>%
mutate(SDI = Scars_number_all / Surface,
perc_remaining_volume = (Volume / Original_Volume) * 100,
perc_extracted_volume = 100 - perc_remaining_volume,
no_cortical_surface = Surface - Cortical_surface,
perc_nco = 100 - ((no_cortical_surface / Original_surface) * 100),
logSDI = log(SDI))
individual_error <- geometries[-1] - geometries$Experimental_data
AE <- apply(individual_error, 2, mean)
Perc_AE <- (AE / mean(geometries$Experimental_data)) * 100
MAE <- apply(abs(individual_error), 2, mean)
Perc_MAE <- (MAE / mean(geometries$Experimental_data)) * 100
RMSE <- sqrt(apply(individual_error^2, 2, mean))
Perc_RMSE <- (RMSE / mean(geometries$Experimental_data)) * 100
Accuracy_table <- data.frame(round(AE, 2), round(Perc_AE, 2), round(MAE, 2), round(Perc_MAE, 2), round(RMSE, 2), round(Perc_RMSE, 2))
Accuracy_table
## round.AE..2. round.Perc_AE..2. round.MAE..2. round.Perc_MAE..2.
## Prism 85.08 44.92 120.43 63.58
## Cube 107.83 56.93 169.95 89.72
## Sphere -33.78 -17.83 96.06 50.72
## Cylinder 19.00 10.03 83.72 44.20
## Ellipsoid -45.69 -24.12 68.27 36.04
## round.RMSE..2. round.Perc_RMSE..2.
## Prism 173.61 91.66
## Cube 244.23 128.95
## Sphere 126.56 66.82
## Cylinder 168.40 88.91
## Ellipsoid 96.82 51.12
individual_error_2 <- Perc_extracted_volume[-1] - Perc_extracted_volume$Experimental_data
AE_2 <- apply(individual_error_2, 2, mean)
Perc_AE_2 <- (AE_2 / mean(Perc_extracted_volume$Experimental_data)) * 100
MAE_2 <- apply(abs(individual_error_2), 2, mean)
Perc_MAE_2 <- (MAE_2 / mean(Perc_extracted_volume$Experimental_data)) * 100
RMSE_2 <- sqrt(apply(individual_error_2^2, 2, mean))
Perc_RMSE_2 <- (RMSE_2 / mean(Perc_extracted_volume$Experimental_data)) * 100
Accuracy_table_percentage <- data.frame(round(AE_2, 2), round(Perc_AE_2, 2), round(MAE_2, 2), round(Perc_MAE_2, 2), round(RMSE_2, 2), round(Perc_RMSE_2, 2))
Accuracy_table_percentage
## round.AE_2..2. round.Perc_AE_2..2. round.MAE_2..2.
## Prism 14.94 24.38 16.80
## Cube 8.56 13.97 21.52
## Sphere -18.92 -30.89 30.93
## Cylinder 3.17 5.17 12.19
## Ellipsoid -6.73 -11.00 13.20
## round.Perc_MAE_2..2. round.RMSE_2..2. round.Perc_RMSE_2..2.
## Prism 27.44 22.22 36.27
## Cube 35.14 29.22 47.71
## Sphere 50.50 48.94 79.91
## Cylinder 19.90 16.39 26.76
## Ellipsoid 21.54 14.86 24.26
# Crea un vector con los nombres de las variables de interés
variables_interes <- c("Scars_number_all", "SDI", "logSDI", "perc_nco",
"Number_of_Exploitation_Surface_Convergences",
"Number_of_Exploitation_Surfaces", "Number_of_rotations",
"Platform_number", "Average_Platform_angle")
# Crea una matriz para almacenar los valores de las correlaciones y las pruebas
tabla_cor <- matrix(NA, nrow = length(variables_interes), ncol = 5)
colnames(tabla_cor) <- c("Pearson R", "Coef_det", "P value", "Spearman rho", "P value")
rownames(tabla_cor) <- variables_interes
# Rellena la matriz con los valores de correlación y pruebas
for (i in 1:length(variables_interes)) {
for (j in 1:5) {
if (j == 1) {
# Pearson R
tabla_cor[i, j] <- cor(experimental_cores[[variables_interes[i]]], experimental_cores$perc_extracted_volume, use = "pairwise.complete.obs")
} else if (j == 2) {
# Coef_det
modelo <- lm(experimental_cores$perc_extracted_volume ~ experimental_cores[[variables_interes[i]]], data = experimental_cores)
tabla_cor[i, j] <- summary(modelo)$r.squared
} else if (j == 3) {
# P value Pearson
tabla_cor[i, j] <- cor.test(experimental_cores[[variables_interes[i]]], experimental_cores$perc_extracted_volume)$p.value
} else if (j == 4) {
# Spearman rho
tabla_cor[i, j] <- cor(experimental_cores[[variables_interes[i]]], experimental_cores$perc_extracted_volume, method = "spearman", use = "pairwise.complete.obs")
} else {
# P value Spearman
tabla_cor[i, j] <- cor.test(experimental_cores[[variables_interes[i]]], experimental_cores$perc_extracted_volume, method = "spearman")$p.value
}
}
}
tabla_cor
## Pearson R Coef_det
## Scars_number_all 0.51524526 0.265477679
## SDI 0.70299963 0.494208484
## logSDI 0.78047909 0.609147606
## perc_nco 0.78908514 0.622655365
## Number_of_Exploitation_Surface_Convergences 0.42491774 0.180555084
## Number_of_Exploitation_Surfaces 0.48691958 0.237090677
## Number_of_rotations 0.08659588 0.007498847
## Platform_number -0.22269811 0.049594449
## Average_Platform_angle -0.15854211 0.025135602
## P value Spearman rho
## Scars_number_all 4.799323e-04 0.50919124
## SDI 2.092381e-07 0.76825217
## logSDI 1.095671e-09 0.76825217
## perc_nco 5.367478e-10 0.84328661
## Number_of_Exploitation_Surface_Convergences 5.032171e-03 0.46262658
## Number_of_Exploitation_Surfaces 1.075278e-03 0.48133356
## Number_of_rotations 5.855505e-01 0.07379203
## Platform_number 1.563173e-01 -0.21306362
## Average_Platform_angle 3.159460e-01 -0.13891755
## P value
## Scars_number_all 5.736159e-04
## SDI 5.847553e-08
## logSDI 5.847553e-08
## perc_nco 0.000000e+00
## Number_of_Exploitation_Surface_Convergences 2.037004e-03
## Number_of_Exploitation_Surfaces 1.250628e-03
## Number_of_rotations 6.423405e-01
## Platform_number 1.754958e-01
## Average_Platform_angle 3.802780e-01
geometries2<- geometries %>% gather(key = Geometry,
value = Value,
Experimental_data, Prism, Cube, Sphere, Cylinder,Ellipsoid)%>%
mutate(Geometry = factor(Geometry, levels = c("Prism", "Cube", "Sphere", "Cylinder", "Ellipsoid", "Experimental_data")))
geometries_reduction<- Perc_extracted_volume %>% gather(key = Geometry,
value = Value,
Experimental_data, Prism, Cube, Sphere, Cylinder,Ellipsoid)
geometries_reduction_error<-Perc_extracted_volume[-1] - Perc_extracted_volume$Experimental_data
geometries_reduction_error<- geometries_reduction_error %>% gather(key = Geometry,
value = Value,
Prism, Cube, Sphere, Cylinder,Ellipsoid)
ggplot(geometries2, aes(x = Geometry, y = Value, fill=Geometry)) +
scale_fill_brewer(palette = "PRGn") +
scale_color_brewer("Geometry",palette="PRGn",drop=TRUE) +
ggdist::stat_halfeye(
adjust = .5,
width = .6,
.width = 0,
justification = 0) +
geom_boxplot(
width = .15,
outlier.shape = NA
) +
## add justified jitter from the {gghalves} package
gghalves::geom_half_point(
aes( color = Geometry),side = "l")+
coord_flip(xlim = c(1.2, NA), clip = "off") +
theme_classic() +
theme(
legend.position = "none",
legend.text = element_text(size = 12),
axis.text = element_text(size = 12),
axis.title = element_text(size = 14)
)+
labs(x= "", y = "Estimated Volume (cm3)") +
scale_y_continuous(breaks = seq(from= 0, to=1100, by=100), limits =c(0,1100))
Figure 3. Boxplot, jitter plot and histogram showing a comparison of the estimated original volumes (EOV) (cm3) obtained from different geometric formulas.
geometries_reduction$Geometry <- factor(geometries_reduction$Geometry,
levels = c("Ellipsoid", "Cylinder", "Sphere", "Prism", "Cube","Experimental_data"))
geometries_reduction_error$Geometry <- factor(geometries_reduction_error$Geometry,
levels = c("Experimental_data", "Ellipsoid", "Cylinder", "Sphere", "Prism", "Cube"))
real_ellipsoid<- geometries_reduction %>% filter (Geometry =="Experimental_data" | Geometry =="Ellipsoid")
real_cylinder<- geometries_reduction %>% filter (Geometry =="Experimental_data" | Geometry =="Cylinder")
real_sphere<- geometries_reduction %>% filter (Geometry =="Experimental_data" | Geometry =="Sphere")
real_cube<- geometries_reduction %>% filter (Geometry =="Experimental_data" | Geometry =="Cube")
real_prism<- geometries_reduction %>% filter (Geometry =="Experimental_data" | Geometry =="Prism")
figure2a<-ggplot(real_ellipsoid, aes(x=Value, color=Geometry, fill= Geometry)) +
geom_density(alpha=.5)+ theme_classic() +xlim(0,100)+ylim(0,.05)+ theme(legend.position="bottom")+
theme (text = element_text(size=14))+ scale_fill_manual(values=c("#7fbf7bff", "#1b7837ff"))+
scale_color_manual(values=c("#7fbf7bff", "#1b7837ff"))+
labs(x= "Percentage of Extracted Volume", y = "", color="", fill = "")
figure2b<-ggplot(real_cylinder, aes(x=Value, color=Geometry, fill= Geometry)) +
geom_density(alpha=.5)+ theme_classic() +xlim(0,100)+ ylim(0,.05)+ theme(legend.position="bottom")+
theme (text = element_text(size=14))+ scale_fill_manual(values=c("#d9f0d3ff", "#1b7837ff"))+
scale_color_manual(values=c("#d9f0d3ff", "#1b7837ff"))+
labs(x= "Percentage of Extracted Volume", y = "", color="", fill = "")
figure2c<-ggplot(real_sphere, aes(x=Value, color=Geometry, fill= Geometry)) +
geom_density(alpha=.5)+ theme_classic() +xlim(0,100)+ ylim(0,.05)+theme(legend.position="bottom")+
theme (text = element_text(size=14))+ scale_fill_manual(values=c("#af8dc3ff", "#1b7837ff"))+
scale_color_manual(values=c("#af8dc3ff", "#1b7837ff"))+
labs(x= "Percentage of Extracted Volume", y = "", color="", fill = "")
figure2d<-ggplot(real_prism, aes(x=Value, color=Geometry, fill= Geometry)) +
geom_density(alpha=.5)+ theme_classic() +xlim(0,100)+ ylim(0,.05)+theme(legend.position="bottom")+
theme (text = element_text(size=14))+ scale_fill_manual(values=c("#e7d4e8ff", "#1b7837ff"))+
scale_color_manual(values=c("#e7d4e8ff", "#1b7837ff"))+
labs(x= "Percentage of Extracted Volume", y = "", color="", fill = "")
figure2e<-ggplot(real_cube, aes(x=Value, color=Geometry, fill= Geometry)) +
geom_density(alpha=.5)+ theme_classic() +xlim(0,100)+ ylim(0,.05)+ theme(legend.position="bottom")+
theme (text = element_text(size=14))+ scale_fill_manual(values=c("#762a83ff","#1b7837ff"))+
scale_color_manual(values=c("#762a83ff", "#1b7837ff"))+
labs(x= "Percentage of Extracted Volume", y = "", color="", fill = "")
figure2f <- ggplot(geometries_reduction_error, aes(Geometry, Value)) +
geom_jitter(aes(colour = factor(Geometry)), size = 4, show.legend = FALSE) +
ylim(-200,100) +
theme_classic() +
scale_color_manual(values=c("#7fbf7bff","#d9f0d3ff","#e7d4e8ff","#af8dc3ff","#762a83ff")) +
geom_hline(yintercept = 0, linetype = "dashed")
library(ggpubr)
ggarrange(figure2a,figure2b, figure2c, figure2d,figure2e,figure2f, labels = c("A", "B", "C", "D","E","F"), nrow=2, ncol=3, common.legend = FALSE)
Figure 4. A-E) Comparisons of Percentage of Extracted Volume distributions between each geometric formula and real data. F) Jitter plot illustrating individual errors, which represent the differences between individual real data points and its estimation based on each geometric formula.
# crear el gráfico para la figura A
Figure5a <- blandr.draw(Perc_extracted_volume$Ellipsoid, Perc_extracted_volume$Experimental_data) +
theme_classic() +
theme(plot.title = element_blank()) + # eliminar el título del gráfico
scale_x_continuous(breaks = seq(from = 0, to = 100, by = 5), limits = c(0, 100)) +
scale_y_continuous(breaks = seq(from = -45, to = 45, by = 5), limits = c(-45, 45))
# modificar el objeto ggplot para cambiar el color del relleno
p_build <- ggplot_build(Figure5a)
p_build$data[[12]][["fill"]] <- "#6078b0ff"
p_build$data[[13]][["fill"]] <- "#c7dd0dff"
p_build$data[[14]][["fill"]] <- "#c7dd0dff"
# guardar el objeto ggplot modificado en una nueva variable
Figure5aa <- ggplot_gtable(p_build)
# crear el gráfico para la figura B
Figure5b <- blandr.draw(Perc_extracted_volume$Cylinder, Perc_extracted_volume$Experimental_data) +
theme_classic() +
theme(plot.title = element_blank()) + # eliminar el título del gráfico
scale_x_continuous(breaks = seq(from = 0, to = 100, by = 5), limits = c(0, 100)) +
scale_y_continuous(breaks = seq(from = -45, to = 45, by = 5), limits = c(-45, 45))
# modificar el objeto ggplot para cambiar el color del relleno
p_build <- ggplot_build(Figure5b)
p_build$data[[12]][["fill"]] <- "#6078b0ff"
p_build$data[[13]][["fill"]] <- "#c7dd0dff"
p_build$data[[14]][["fill"]] <- "#c7dd0dff"
# guardar el objeto ggplot modificado en una nueva variable
Figure5bb <- ggplot_gtable(p_build)
# combinar las dos figuras en una sola
grid.arrange(Figure5aa, Figure5bb, ncol = 2, widths = c(1, 1))
Figure 5. Visualization of the Bland-Altman test for the a) ellipsoid, and b) the cylinder formulas.