Part 1: Study of the experimantal assemblage

Below is reported the experimental analysis to evaluate the performance of reduction intensity methods applied to blade and bladelet cores. All steps of data analysis and visualization are reported to allow optimal reproducibility.


List of used packages

library(Hmisc)
library(readr)
library(readxl)
library(officer)
library(plyr)
library(dplyr)
library(ggplot2)
library(reshape2)
library(tidyr)
library(ggplot2)
library(BlandAltmanLeh)
library(gridExtra)
library(blandr)


Loading datasets

experimental_flakes <- readxl::read_xlsx("experimental_flakes.xlsx")
experimental_cores <- readxl::read_xlsx("experimental_cores.xlsx")


Functions to correct core’s dimensions

# corrdimslength takes gen (generation) and median as inputs and returns the corrected length of the core by multiplying gen and median
corrdimslength <- function(gen, median) {
  gen * median
}

# corrdimslength2 takes gen1, gen2, and dim as inputs and returns the corrected length of the core by adding gen1, gen2, and dim.
corrdimslength2 <- function(gen1, gen2, dim) {
  gen1 + gen2 + dim
}

# corrdims takes gen, median, and dim as inputs and returns the corrected dimensions of the core by multiplying gen and median and adding dim.
corrdims <- function(gen, median, dim) {
  gen * median + dim
}


Subsetting the flakes and cores datasets

experimental_flakes %>%
  filter(plat_cor == "1") %>%
  summarise(mean_thickness = mean(Max_thickness),
            median_thickness = median(Max_thickness),
            shapiro_test = shapiro.test(Max_thickness)$p.value) %>%
  {median_platform <<- median(.$median_thickness)}

experimental_flakes %>%
  filter(dim_cor == "2") %>%
  summarise(mean_thickness = mean(Max_thickness),
            median_thickness = median(Max_thickness),
            shapiro_test = shapiro.test(Max_thickness)$p.value) %>%
  {median_base <<- median(.$median_thickness)}

experimental_cores<-experimental_cores %>%
  mutate(Volume = Volume/1000) %>%
  filter(Original == "No")
experimental_cores <- experimental_cores %>%
  mutate(corrected_length_platform = corrdimslength(Gen_Length_plat, median_platform),
         corrected_length_base = corrdimslength(Gen_Length_base, median_base),
         corrected_length = corrdimslength2(corrected_length_platform, corrected_length_base, Max_Length),
         corrected_width = corrdims(Gen_Width, median_base, Max_Width),
         corrected_thickness = corrdims(Gen_Thickness, median_base, Max_Thickness))
experimental_cores
## # A tibble: 42 × 33
##    Core_ID Origi…¹ Volume Surface Weight Max_L…² Max_W…³ Max_T…⁴ Gen_L…⁵ Gen_L…⁶
##    <chr>   <chr>    <dbl>   <dbl>  <dbl>   <dbl>   <dbl>   <dbl>   <dbl>   <dbl>
##  1 1.1     No       176.   20206.  453.    100.     48.8    84.5       0       0
##  2 1.2     No       142.   16850.  365.     92.6    48.5    74.2       0       0
##  3 1.3     No       106.   14324.  271.     85.4    48.2    60.6       0       0
##  4 1.4     No        85.8  11982.  220.     74.8    48.2    57.2       0       0
##  5 1.5     No        72.7  10683.  187.     72.2    46.6    56.3       0       0
##  6 1.6     No        51.0   8664.  131.     64.7    44.7    55.9       0       0
##  7 1.7     No        35.4   6797.   91.1    59.6    38.3    53.3       1       0
##  8 1.8     No        24.5   5296.   63.0    50.9    28.9    42.5       2       0
##  9 1.9     No        14.6   3817.   37.5    45.6    21.0    38.0       2       0
## 10 2.1     No       115.   14685.  297.     54      45.3    67.8       0       0
## # … with 32 more rows, 23 more variables: Gen_Width <dbl>, Gen_Thickness <dbl>,
## #   Original_Volume <dbl>, ...14 <dbl>, Original_surface <dbl>,
## #   Scars_number_flaking_surface <dbl>, Scars_number_platform <dbl>,
## #   Scars_number_all <dbl>, `Hinges_number_flaking_surface_>5mm` <dbl>,
## #   `Hinges_number_platform_>5mm` <dbl>, `Hinges_number_all_>5mm` <dbl>,
## #   Platform_number <dbl>, Number_of_rotations <dbl>,
## #   Faces_exploited_number <dbl>, Cortical_surface <dbl>, …


Computing the different geoemtric formulas

geometries <- experimental_cores %>%
  transmute(
    Experimental_data = Original_Volume / 1000,
    Prism = corrected_width * corrected_thickness * corrected_length / 1000,
    Cube = corrected_length^3 / 1000,
    Sphere = 4/3 * pi * ((corrected_length/2)^3) / 1000,
    Cylinder = pi * ((corrected_width/2)^2) * corrected_length / 1000,
    Ellipsoid = 4/3 * pi * ((corrected_length/2) * (corrected_width/2) * (corrected_thickness/2)) / 1000
  )
geometries
## # A tibble: 42 × 6
##    Experimental_data Prism  Cube Sphere Cylinder Ellipsoid
##                <dbl> <dbl> <dbl>  <dbl>    <dbl>     <dbl>
##  1              235.  453. 1014.  531.     188.      237. 
##  2              235.  369.  794.  416.     171.      193. 
##  3              235.  349.  622.  326.     212.      182. 
##  4              235.  311.  418.  219.     213.      163. 
##  5              235.  309.  377.  197.     222.      162. 
##  6              235.  249.  271.  142.     164.      130. 
##  7              235.  222.  257.  135.     127.      116. 
##  8              235.  170.  204.  107.      77.3      88.8
##  9              235.  110.  154.   80.8     46.0      57.5
## 10              161.  238.  157.   82.4    159.      125. 
## # … with 32 more rows


Summarizing geoemtries

geometries_summary <- data.frame(
  length = round(sapply(geometries, length), 2),
  mean = round(sapply(geometries, mean), 2),
  median = round(sapply(geometries, median), 2),
  min= round(sapply(geometries, min), 2),
  max= round(sapply(geometries, max), 2),
  sd = round(sapply(geometries, sd), 2),
  cv = round(sapply(geometries, function(x) sd(x) / mean(x)), 2),
  shapiro_wilk = round(sapply(geometries, function(x) shapiro.test(x)$p.value), 7)
)
geometries_summary 
##                   length   mean median   min     max     sd   cv shapiro_wilk
## Experimental_data     42 189.41 183.01 38.77  425.62  96.91 0.51    0.0094339
## Prism                 42 274.49 224.12 70.20 1032.43 193.02 0.70    0.0000121
## Cube                  42 297.24 259.64 46.46 1109.04 245.15 0.82    0.0000281
## Sphere                42 155.63 135.95 24.33  580.69 128.36 0.82    0.0000281
## Cylinder              42 208.41 161.17 46.02 1343.43 211.27 1.01    0.0000000
## Ellipsoid             42 143.72 117.35 36.76  540.58 101.07 0.70    0.0000121


Calculating volume’s percentages

Perc_remaining_volume <- (experimental_cores$Volume / geometries * 100)
Perc_extracted_volume <- 100 - Perc_remaining_volume


Perc_extracted_volume_summary <- data.frame(
  length = round(sapply(Perc_extracted_volume, length), 2),
  mean = round(sapply(Perc_extracted_volume, mean), 2),
  median = round(sapply(Perc_extracted_volume, median), 2),
  min= round(sapply(Perc_extracted_volume, min), 2),
  max= round(sapply(Perc_extracted_volume, max), 2),
  sd = round(sapply(Perc_extracted_volume, sd), 2),
  cv = round(sapply(Perc_extracted_volume, function(x) sd(x) / mean(x)), 2),
  shapiro_wilk = round(sapply(Perc_extracted_volume, function(x) shapiro.test(x)$p.value), 6)
)

Perc_extracted_volume_summary
##                   length  mean median     min   max    sd   cv shapiro_wilk
## Experimental_data     42 61.25  65.26   21.05 95.11 22.44 0.37     0.023203
## Prism                 42 76.18  76.99   51.43 87.80  7.90 0.10     0.020038
## Cube                  42 69.80  76.99  -16.90 93.61 24.20 0.35     0.000001
## Sphere                42 42.33  56.06 -123.26 87.80 46.22 1.09     0.000001
## Cylinder              42 64.41  70.27    6.05 87.78 19.23 0.30     0.000094
## Ellipsoid             42 54.51  56.06    7.24 76.69 15.10 0.28     0.020038


Calculating Pearson’s and Spearman’s correlations

correlations_pearson <- rcorr(as.matrix(geometries), type = "pearson")
correlations_spearman <- rcorr(as.matrix(geometries), type = "spearman")

experimental_cores <- experimental_cores %>%
  mutate(SDI = Scars_number_all / Surface,
         perc_remaining_volume = (Volume / Original_Volume) * 100,
         perc_extracted_volume = 100 - perc_remaining_volume,
         no_cortical_surface = Surface - Cortical_surface,
         perc_nco = 100 - ((no_cortical_surface / Original_surface) * 100),
         logSDI = log(SDI))


Computing AE, %AE, MAE, %MAE, RMSE, and %RMSE

individual_error <- geometries[-1] - geometries$Experimental_data

AE <- apply(individual_error, 2, mean)
Perc_AE <- (AE / mean(geometries$Experimental_data)) * 100

MAE <- apply(abs(individual_error), 2, mean)
Perc_MAE <- (MAE / mean(geometries$Experimental_data)) * 100

RMSE <- sqrt(apply(individual_error^2, 2, mean))
Perc_RMSE <- (RMSE / mean(geometries$Experimental_data)) * 100

Accuracy_table <- data.frame(round(AE, 2), round(Perc_AE, 2), round(MAE, 2), round(Perc_MAE, 2), round(RMSE, 2), round(Perc_RMSE, 2))

Accuracy_table
##           round.AE..2. round.Perc_AE..2. round.MAE..2. round.Perc_MAE..2.
## Prism            85.08             44.92        120.43              63.58
## Cube            107.83             56.93        169.95              89.72
## Sphere          -33.78            -17.83         96.06              50.72
## Cylinder         19.00             10.03         83.72              44.20
## Ellipsoid       -45.69            -24.12         68.27              36.04
##           round.RMSE..2. round.Perc_RMSE..2.
## Prism             173.61               91.66
## Cube              244.23              128.95
## Sphere            126.56               66.82
## Cylinder          168.40               88.91
## Ellipsoid          96.82               51.12


Computing percentages of AE, %AE, MAE, %MAE, RMSE, and %RMSE

individual_error_2 <- Perc_extracted_volume[-1] - Perc_extracted_volume$Experimental_data

AE_2 <- apply(individual_error_2, 2, mean)
Perc_AE_2 <- (AE_2 / mean(Perc_extracted_volume$Experimental_data)) * 100

MAE_2 <- apply(abs(individual_error_2), 2, mean)
Perc_MAE_2 <- (MAE_2 / mean(Perc_extracted_volume$Experimental_data)) * 100

RMSE_2 <- sqrt(apply(individual_error_2^2, 2, mean))
Perc_RMSE_2 <- (RMSE_2 / mean(Perc_extracted_volume$Experimental_data)) * 100

Accuracy_table_percentage <- data.frame(round(AE_2, 2), round(Perc_AE_2, 2), round(MAE_2, 2), round(Perc_MAE_2, 2), round(RMSE_2, 2), round(Perc_RMSE_2, 2))

Accuracy_table_percentage
##           round.AE_2..2. round.Perc_AE_2..2. round.MAE_2..2.
## Prism              14.94               24.38           16.80
## Cube                8.56               13.97           21.52
## Sphere            -18.92              -30.89           30.93
## Cylinder            3.17                5.17           12.19
## Ellipsoid          -6.73              -11.00           13.20
##           round.Perc_MAE_2..2. round.RMSE_2..2. round.Perc_RMSE_2..2.
## Prism                    27.44            22.22                 36.27
## Cube                     35.14            29.22                 47.71
## Sphere                   50.50            48.94                 79.91
## Cylinder                 19.90            16.39                 26.76
## Ellipsoid                21.54            14.86                 24.26


Results of the correlation tests

# Crea un vector con los nombres de las variables de interés
variables_interes <- c("Scars_number_all", "SDI", "logSDI", "perc_nco",
                       "Number_of_Exploitation_Surface_Convergences", 
                       "Number_of_Exploitation_Surfaces", "Number_of_rotations", 
                       "Platform_number", "Average_Platform_angle")

# Crea una matriz para almacenar los valores de las correlaciones y las pruebas
tabla_cor <- matrix(NA, nrow = length(variables_interes), ncol = 5)
colnames(tabla_cor) <- c("Pearson R", "Coef_det", "P value", "Spearman rho", "P value")
rownames(tabla_cor) <- variables_interes

# Rellena la matriz con los valores de correlación y pruebas
for (i in 1:length(variables_interes)) {
  for (j in 1:5) {
    if (j == 1) {
      # Pearson R
      tabla_cor[i, j] <- cor(experimental_cores[[variables_interes[i]]], experimental_cores$perc_extracted_volume, use = "pairwise.complete.obs")
    } else if (j == 2) {
      # Coef_det
      modelo <- lm(experimental_cores$perc_extracted_volume ~ experimental_cores[[variables_interes[i]]], data = experimental_cores)
      tabla_cor[i, j] <- summary(modelo)$r.squared
    } else if (j == 3) {
      # P value Pearson
      tabla_cor[i, j] <- cor.test(experimental_cores[[variables_interes[i]]], experimental_cores$perc_extracted_volume)$p.value
    } else if (j == 4) {
      # Spearman rho
      tabla_cor[i, j] <- cor(experimental_cores[[variables_interes[i]]], experimental_cores$perc_extracted_volume, method = "spearman", use = "pairwise.complete.obs")
    } else {
      # P value Spearman
      tabla_cor[i, j] <- cor.test(experimental_cores[[variables_interes[i]]], experimental_cores$perc_extracted_volume, method = "spearman")$p.value
    }
  }
}

tabla_cor
##                                               Pearson R    Coef_det
## Scars_number_all                             0.51524526 0.265477679
## SDI                                          0.70299963 0.494208484
## logSDI                                       0.78047909 0.609147606
## perc_nco                                     0.78908514 0.622655365
## Number_of_Exploitation_Surface_Convergences  0.42491774 0.180555084
## Number_of_Exploitation_Surfaces              0.48691958 0.237090677
## Number_of_rotations                          0.08659588 0.007498847
## Platform_number                             -0.22269811 0.049594449
## Average_Platform_angle                      -0.15854211 0.025135602
##                                                  P value Spearman rho
## Scars_number_all                            4.799323e-04   0.50919124
## SDI                                         2.092381e-07   0.76825217
## logSDI                                      1.095671e-09   0.76825217
## perc_nco                                    5.367478e-10   0.84328661
## Number_of_Exploitation_Surface_Convergences 5.032171e-03   0.46262658
## Number_of_Exploitation_Surfaces             1.075278e-03   0.48133356
## Number_of_rotations                         5.855505e-01   0.07379203
## Platform_number                             1.563173e-01  -0.21306362
## Average_Platform_angle                      3.159460e-01  -0.13891755
##                                                  P value
## Scars_number_all                            5.736159e-04
## SDI                                         5.847553e-08
## logSDI                                      5.847553e-08
## perc_nco                                    0.000000e+00
## Number_of_Exploitation_Surface_Convergences 2.037004e-03
## Number_of_Exploitation_Surfaces             1.250628e-03
## Number_of_rotations                         6.423405e-01
## Platform_number                             1.754958e-01
## Average_Platform_angle                      3.802780e-01


Building Figure 3

geometries2<- geometries %>% gather(key = Geometry,
                                    value = Value,
                                    Experimental_data,  Prism, Cube, Sphere, Cylinder,Ellipsoid)%>% 
  mutate(Geometry = factor(Geometry, levels = c("Prism", "Cube", "Sphere", "Cylinder", "Ellipsoid", "Experimental_data")))


geometries_reduction<- Perc_extracted_volume %>% gather(key = Geometry,
                                                        value = Value,
                                                        Experimental_data,  Prism, Cube, Sphere, Cylinder,Ellipsoid)




geometries_reduction_error<-Perc_extracted_volume[-1] - Perc_extracted_volume$Experimental_data
geometries_reduction_error<- geometries_reduction_error %>% gather(key = Geometry,
                                                                   value = Value,
                                                                   Prism, Cube, Sphere, Cylinder,Ellipsoid)


ggplot(geometries2, aes(x = Geometry, y = Value, fill=Geometry)) + 
  scale_fill_brewer(palette = "PRGn") +
  scale_color_brewer("Geometry",palette="PRGn",drop=TRUE) +
  ggdist::stat_halfeye(
    adjust = .5, 
    width = .6, 
    .width = 0, 
    justification = 0) + 
  geom_boxplot(
    width = .15, 
    outlier.shape = NA
  ) +
  ## add justified jitter from the {gghalves} package
  gghalves::geom_half_point(
    aes( color = Geometry),side = "l")+ 
  coord_flip(xlim = c(1.2, NA), clip = "off") +
  theme_classic() +
  theme(
    legend.position = "none",
    legend.text = element_text(size = 12),
    axis.text = element_text(size = 12),
    axis.title = element_text(size = 14)
  )+
  labs(x= "", y = "Estimated Volume (cm3)") +
  scale_y_continuous(breaks = seq(from= 0, to=1100, by=100), limits =c(0,1100))

Figure 3. Boxplot, jitter plot and histogram showing a comparison of the estimated original volumes (EOV) (cm3) obtained from different geometric formulas.


Building Figure 4

geometries_reduction$Geometry <- factor(geometries_reduction$Geometry,
                                        levels = c("Ellipsoid", "Cylinder", "Sphere", "Prism", "Cube","Experimental_data"))
geometries_reduction_error$Geometry <- factor(geometries_reduction_error$Geometry,
                                        levels = c("Experimental_data", "Ellipsoid", "Cylinder", "Sphere", "Prism", "Cube"))

real_ellipsoid<- geometries_reduction %>% filter (Geometry =="Experimental_data" | Geometry =="Ellipsoid") 
real_cylinder<- geometries_reduction %>% filter (Geometry =="Experimental_data" | Geometry =="Cylinder") 
real_sphere<- geometries_reduction %>% filter (Geometry =="Experimental_data" | Geometry =="Sphere") 
real_cube<- geometries_reduction %>% filter (Geometry =="Experimental_data" | Geometry =="Cube") 
real_prism<- geometries_reduction %>% filter (Geometry =="Experimental_data" | Geometry =="Prism") 



figure2a<-ggplot(real_ellipsoid, aes(x=Value, color=Geometry, fill= Geometry)) + 
  geom_density(alpha=.5)+ theme_classic() +xlim(0,100)+ylim(0,.05)+ theme(legend.position="bottom")+ 
  theme (text = element_text(size=14))+ scale_fill_manual(values=c("#7fbf7bff", "#1b7837ff"))+ 
  scale_color_manual(values=c("#7fbf7bff", "#1b7837ff"))+
  labs(x= "Percentage of Extracted Volume", y = "", color="", fill = "")

figure2b<-ggplot(real_cylinder, aes(x=Value, color=Geometry, fill= Geometry)) + 
  geom_density(alpha=.5)+ theme_classic() +xlim(0,100)+ ylim(0,.05)+ theme(legend.position="bottom")+ 
  theme (text = element_text(size=14))+ scale_fill_manual(values=c("#d9f0d3ff", "#1b7837ff"))+ 
  scale_color_manual(values=c("#d9f0d3ff", "#1b7837ff"))+
  labs(x= "Percentage of Extracted Volume", y = "", color="", fill = "")

figure2c<-ggplot(real_sphere, aes(x=Value, color=Geometry, fill= Geometry)) + 
  geom_density(alpha=.5)+ theme_classic() +xlim(0,100)+ ylim(0,.05)+theme(legend.position="bottom")+ 
  theme (text = element_text(size=14))+ scale_fill_manual(values=c("#af8dc3ff", "#1b7837ff"))+ 
  scale_color_manual(values=c("#af8dc3ff", "#1b7837ff"))+
  labs(x= "Percentage of Extracted Volume", y = "", color="", fill = "")


figure2d<-ggplot(real_prism, aes(x=Value, color=Geometry, fill= Geometry)) + 
  geom_density(alpha=.5)+ theme_classic() +xlim(0,100)+ ylim(0,.05)+theme(legend.position="bottom")+ 
  theme (text = element_text(size=14))+ scale_fill_manual(values=c("#e7d4e8ff", "#1b7837ff"))+ 
  scale_color_manual(values=c("#e7d4e8ff", "#1b7837ff"))+
  labs(x= "Percentage of Extracted Volume", y = "", color="", fill = "")

figure2e<-ggplot(real_cube, aes(x=Value, color=Geometry, fill= Geometry)) + 
  geom_density(alpha=.5)+ theme_classic() +xlim(0,100)+ ylim(0,.05)+ theme(legend.position="bottom")+ 
  theme (text = element_text(size=14))+ scale_fill_manual(values=c("#762a83ff","#1b7837ff"))+ 
  scale_color_manual(values=c("#762a83ff", "#1b7837ff"))+
  labs(x= "Percentage of Extracted Volume", y = "", color="", fill = "")

figure2f <- ggplot(geometries_reduction_error, aes(Geometry, Value)) +
  geom_jitter(aes(colour = factor(Geometry)), size = 4, show.legend = FALSE) +
  ylim(-200,100) +
  theme_classic() +
  scale_color_manual(values=c("#7fbf7bff","#d9f0d3ff","#e7d4e8ff","#af8dc3ff","#762a83ff")) +
  geom_hline(yintercept = 0, linetype = "dashed")

library(ggpubr)

ggarrange(figure2a,figure2b, figure2c, figure2d,figure2e,figure2f, labels = c("A", "B", "C", "D","E","F"), nrow=2, ncol=3, common.legend = FALSE)

Figure 4. A-E) Comparisons of Percentage of Extracted Volume distributions between each geometric formula and real data. F) Jitter plot illustrating individual errors, which represent the differences between individual real data points and its estimation based on each geometric formula.


Building Figure 5

# crear el gráfico para la figura A
Figure5a <- blandr.draw(Perc_extracted_volume$Ellipsoid, Perc_extracted_volume$Experimental_data) +
  theme_classic() +
  theme(plot.title = element_blank()) + # eliminar el título del gráfico
  scale_x_continuous(breaks = seq(from = 0, to = 100, by = 5), limits = c(0, 100)) +
  scale_y_continuous(breaks = seq(from = -45, to = 45, by = 5), limits = c(-45, 45))

# modificar el objeto ggplot para cambiar el color del relleno
p_build <- ggplot_build(Figure5a)
p_build$data[[12]][["fill"]] <- "#6078b0ff"
p_build$data[[13]][["fill"]] <- "#c7dd0dff"
p_build$data[[14]][["fill"]] <- "#c7dd0dff"

# guardar el objeto ggplot modificado en una nueva variable
Figure5aa <- ggplot_gtable(p_build)

# crear el gráfico para la figura B
Figure5b <- blandr.draw(Perc_extracted_volume$Cylinder, Perc_extracted_volume$Experimental_data) +
  theme_classic() +
  theme(plot.title = element_blank()) + # eliminar el título del gráfico
  scale_x_continuous(breaks = seq(from = 0, to = 100, by = 5), limits = c(0, 100)) +
  scale_y_continuous(breaks = seq(from = -45, to = 45, by = 5), limits = c(-45, 45))

# modificar el objeto ggplot para cambiar el color del relleno
p_build <- ggplot_build(Figure5b)
p_build$data[[12]][["fill"]] <- "#6078b0ff"
p_build$data[[13]][["fill"]] <- "#c7dd0dff"
p_build$data[[14]][["fill"]] <- "#c7dd0dff"

# guardar el objeto ggplot modificado en una nueva variable
Figure5bb <- ggplot_gtable(p_build)

# combinar las dos figuras en una sola
grid.arrange(Figure5aa, Figure5bb, ncol = 2, widths = c(1, 1))

Figure 5. Visualization of the Bland-Altman test for the a) ellipsoid, and b) the cylinder formulas.