Below is reported the analysis of the early Protoaurignacian assemblage A2-A1 from Fumane Cave. All steps of data analysis and visualization are reported to allow optimal reproducibility.
library(dplyr)
library(ggplot2)
library(readxl)
library(gridExtra)
library(plyr)
library(viridis)
library(survminer)
library(ggpubr)
library(readr)
library(fitdistrplus)
library(fitdistrplus)
fumane_flakes <- readxl::read_xlsx("fumane_flakes.xlsx")
fumane_cores <- readxl::read_xlsx("fumane_cores.xlsx")
# corrdimslength takes gen (generation) and median as inputs and returns the corrected length of the core by multiplying gen and median
corrdimslength<-function(gen,median){
corrected_dim_length<- (gen*median)
return(corrected_dim_length)
}
# corrdimslength2 takes gen1, gen2, and dim as inputs and returns the corrected length of the core by adding gen1, gen2, and dim.
corrdimslength2<-function(gen1,gen2, dim){
corrected_dim_length2<- (gen1+gen2)+dim
return(corrected_dim_length2)
}
# corrdims takes gen, median, and dim as inputs and returns the corrected dimensions of the core by multiplying gen and median and adding dim.
corrdims<-function(gen,median,dim){
corrected_dim<- (gen*median)+dim
return(corrected_dim)
}
# The code filters a dataset called "fumane_flakes" based on specific conditions and creates two separate subsets of data named "group1" and "group2".
fumane_flakes <- subset(fumane_flakes, !is.na(Thickness) & Group == "1")
group1 <- filter(fumane_flakes, Basic_Blank %in% c("Flake", "Tablet"))
group2 <- filter(fumane_flakes, Basic_Blank %in% c("Flake", "Blade", "Bladelet"))
#The code filters a dataset called "fumane_cores" to create a subset called "cores_final". It also transforms the "Classification_B" and "Production" columns in the "cores_final" dataframe by changing their data type and modifying their levels. Additionally, the "Volume" column values are divided by 1000.
(cores_final <- fumane_cores %>%
filter(Group == "1") %>%
mutate(Classification_B = factor(Classification_B, levels = c("Initial", "Carinated", "Narrow", "Wide", "Semicircumferential", "Multiplatform")),
Production = factor(Production, levels = c("Blade", "Blade-flake", "Blade-bladelet", "Bladelet")),
Volume = Volume / 1000))
## # A tibble: 124 × 23
## ID Unit Group Raw_material Classif…¹ Class…² Produ…³ Blank Volume Surface
## <dbl> <chr> <dbl> <chr> <chr> <fct> <fct> <chr> <dbl> <dbl>
## 1 7 A1 1 Maiolica Multi-pl… Multip… Blade-… Unde… 30.7 6408.
## 2 11 A1 1 Maiolica Narrow-s… Narrow Bladel… Flake 17.7 4251.
## 3 24 A1 1 Maiolica Semi-cir… Semici… Blade-… Block 18.7 4169.
## 4 38 A1 1 Maiolica Multi-pl… Multip… Bladel… Unde… 7.44 2312.
## 5 39 A1 1 Maiolica Multi-pl… Multip… Bladel… Unde… 18.5 4417.
## 6 42 A1 1 Maiolica Carinated Carina… Bladel… Nodu… 31.2 6110.
## 7 43 A1 1 Maiolica Narrow-s… Narrow Blade-… Nodu… 42.3 7143.
## 8 44 A1 1 Maiolica Narrow-s… Narrow Blade-… Block 14.8 3591.
## 9 52 A1 1 Maiolica Narrow-s… Narrow Bladel… Unde… 15.7 3965.
## 10 63 A1 1 Maiolica Multi-pl… Multip… Blade-… Unde… 9.54 2549.
## # … with 114 more rows, 13 more variables: `Weight (gr)` <dbl>, Co_Area <dbl>,
## # Angle <dbl>, `Scars on flaking surface` <dbl>, `Scars on platform` <dbl>,
## # Scars <dbl>, L <dbl>, W <dbl>, T <dbl>, Gen_Length_base <dbl>,
## # Gen_Length_platform <dbl>, Gen_Width <dbl>, Gen_Thickness <dbl>, and
## # abbreviated variable names ¹​Classification_A, ²​Classification_B,
## # ³​Production
# The code below calculates the mean and median of the "Thickness" variable grouped by "Group" and "Raw_material" in two separate datasets, "group1" and "group2" respectively. The results are then joined with the "cores_final" dataset.
summarymedianplatform <- group1 %>%
group_by(Raw_material) %>%
dplyr::summarise(meanplatform = mean(Thickness), medianplatform = median(Thickness))
summarymedianbase <- group2 %>%
group_by(Raw_material) %>%
dplyr::summarise(meanbase = mean(Thickness), medianbase = median(Thickness))
cores_final <- cores_final %>%
left_join(summarymedianplatform, by = c("Raw_material")) %>%
left_join(summarymedianbase, by = c("Raw_material"))
cores_final
## # A tibble: 124 × 27
## ID Unit Group Raw_material Classif…¹ Class…² Produ…³ Blank Volume Surface
## <dbl> <chr> <dbl> <chr> <chr> <fct> <fct> <chr> <dbl> <dbl>
## 1 7 A1 1 Maiolica Multi-pl… Multip… Blade-… Unde… 30.7 6408.
## 2 11 A1 1 Maiolica Narrow-s… Narrow Bladel… Flake 17.7 4251.
## 3 24 A1 1 Maiolica Semi-cir… Semici… Blade-… Block 18.7 4169.
## 4 38 A1 1 Maiolica Multi-pl… Multip… Bladel… Unde… 7.44 2312.
## 5 39 A1 1 Maiolica Multi-pl… Multip… Bladel… Unde… 18.5 4417.
## 6 42 A1 1 Maiolica Carinated Carina… Bladel… Nodu… 31.2 6110.
## 7 43 A1 1 Maiolica Narrow-s… Narrow Blade-… Nodu… 42.3 7143.
## 8 44 A1 1 Maiolica Narrow-s… Narrow Blade-… Block 14.8 3591.
## 9 52 A1 1 Maiolica Narrow-s… Narrow Bladel… Unde… 15.7 3965.
## 10 63 A1 1 Maiolica Multi-pl… Multip… Blade-… Unde… 9.54 2549.
## # … with 114 more rows, 17 more variables: `Weight (gr)` <dbl>, Co_Area <dbl>,
## # Angle <dbl>, `Scars on flaking surface` <dbl>, `Scars on platform` <dbl>,
## # Scars <dbl>, L <dbl>, W <dbl>, T <dbl>, Gen_Length_base <dbl>,
## # Gen_Length_platform <dbl>, Gen_Width <dbl>, Gen_Thickness <dbl>,
## # meanplatform <dbl>, medianplatform <dbl>, meanbase <dbl>, medianbase <dbl>,
## # and abbreviated variable names ¹​Classification_A, ²​Classification_B,
## # ³​Production
cores_final <- cores_final %>%
mutate(corrected_length_platform = corrdimslength(Gen_Length_platform, medianplatform),
corrected_length_base = corrdimslength(Gen_Length_base, medianbase),
corrected_length = corrdimslength2(corrected_length_platform, corrected_length_base, L),
corrected_width = corrdims(Gen_Width, medianbase, W),
corrected_thickness = corrdims(Gen_Thickness, medianbase,T))
# The code calculates additional variables in the "cores_final" dataset, including estimated volume, percentages of remaining and extracted volume, scar density index, logarithm of scar density index, non-cortical area, and percentage of non-cortical surface
cores_final <- cores_final %>%
mutate(est_vol = (pi * ((corrected_width/2)^2) * corrected_length) / 1000,
Perc_remaining_volume = (Volume / est_vol) * 100,
Perc_extracted_volume = 100 - Perc_remaining_volume,
SDI = Scars / Surface,
logSDI = log(SDI),
no_cortical_area = Surface - Co_Area,
perc_nco = (no_cortical_area / Surface) * 100)
coresvrm <- cores_final %>%
filter(Blank %in% c("Nodule", "Slab", "Block", "Undetermined"))
Figure6a<-ggplot(coresvrm, aes(x=Raw_material, y=Perc_extracted_volume, fill=Raw_material)) +
geom_boxplot(alpha=.7) +
geom_jitter(aes(colour=factor(Raw_material)), position=position_jitter(0.2)) +
theme_classic() +
theme(legend.position="none", text=element_text(size=14)) +
labs(x="Raw material", y="Percentage of Extracted Volume (%)") +
scale_y_continuous(breaks=seq(from=0, to=100, by=10), limits=c(0, 100)) +
scale_fill_manual(values=c("#295673ff", "#4f90a6ff", "#68abb8ff", "#84c3c9ff", "#a7dcd8ff")) +
scale_color_manual(values=c("#295673ff", "#4f90a6ff", "#68abb8ff", "#84c3c9ff", "#a7dcd8ff"))
Figure6b<- ggplot(cores_final, aes(x=Raw_material, y=SDI, fill=Raw_material)) +
geom_boxplot(alpha=.7) +
geom_jitter(aes(colour=factor(Raw_material)), position=position_jitter(0.2)) +
theme_classic() +
theme(legend.position="none", text=element_text(size=14)) +
labs(x="Raw material", y="SDI") +
scale_y_continuous(breaks = seq(from= 0, to=0.015, by=0.001), limits =c(0,0.015)) +
scale_fill_manual(values=c("#295673ff", "#4f90a6ff", "#68abb8ff", "#84c3c9ff", "#a7dcd8ff")) +
scale_color_manual(values=c("#295673ff", "#4f90a6ff", "#68abb8ff", "#84c3c9ff", "#a7dcd8ff"))
Figure6c<-ggplot(cores_final, aes(x=Raw_material, y=perc_nco, fill=Raw_material)) +
geom_boxplot(alpha=.7) +
geom_jitter(aes(colour=factor(Raw_material)), position=position_jitter(0.2)) +
theme_classic() +
theme(legend.position="none", text=element_text(size=14)) +
labs(x = "Raw material",y = "Percentage of Non cortical surface(%)") +
scale_y_continuous(breaks = seq(from= 0, to=100, by=10), limits =c(0,100)) +
scale_fill_manual(values=c("#295673ff", "#4f90a6ff", "#68abb8ff", "#84c3c9ff", "#a7dcd8ff")) +
scale_color_manual(values=c("#295673ff", "#4f90a6ff", "#68abb8ff", "#84c3c9ff", "#a7dcd8ff"))
Figure6d<-ggplot(cores_final, aes(x=Raw_material, y=Volume, fill=Raw_material)) +
geom_boxplot(alpha=.7) +
geom_jitter(aes(colour=factor(Raw_material)), position=position_jitter(0.2)) +
theme_classic() +
theme(legend.position="none", text=element_text(size=14)) +
labs(x = "Raw material",y = "Volume (cm3)") +
scale_y_continuous(breaks = seq(from= 0, to=150, by=10), limits =c(0,150)) +
scale_fill_manual(values=c("#295673ff", "#4f90a6ff", "#68abb8ff", "#84c3c9ff", "#a7dcd8ff")) +
scale_color_manual(values=c("#295673ff", "#4f90a6ff", "#68abb8ff", "#84c3c9ff", "#a7dcd8ff"))
Figure6az <- ggarrange(Figure6a, labels = "a")
Figure6bz <- ggarrange(Figure6b, labels = "b")
Figure6cz <- ggarrange(Figure6c, labels = "c")
Figure6dz <- ggarrange(Figure6d, labels = "d")
Figure6 <- grid.arrange(arrangeGrob(Figure6az, Figure6bz, ncol = 2), arrangeGrob(Figure6cz, Figure6dz, ncol = 2), nrow = 2)
#To save the figure run the code below
#ggsave("Figure 6 Raw material", plot = Figure6, width = 40, height = 40, device = "png", units = "cm", dpi = 600)
Figure 6. Boxplot and jitter plot showing the results for a) PEV obtained through VRM, b) SDI) c) Percentage of non-cortical surface, and d) volume of the cores according to raw material type.
# The code calculates summary statistics for the variable "Perc_extracted_volume" grouped by "Raw_material" using the ddply() function from the plyr package. The resulting summary includes the number of observations (N), mean, median, minimum (Min), maximum (Max), standard deviation (SD), and coefficient of variation (CV). The summary results are stored in the object "Summary_results" and displayed.
Summary_results <- ddply(coresvrm, .(Raw_material), summarise,
N = length(Perc_extracted_volume),
Mean = round(mean(Perc_extracted_volume), 2),
Median = round(median(Perc_extracted_volume), 2),
Min = round(min(Perc_extracted_volume), 2),
Max = round(max(Perc_extracted_volume), 2),
SD = round(sd(Perc_extracted_volume), 2),
CV = round(sd(Perc_extracted_volume) / mean(Perc_extracted_volume), 2))
Summary_results
## Raw_material N Mean Median Min Max SD CV
## 1 Maiolica 72 76.27 79.06 45.82 89.26 9.96 0.13
## 2 Other 2 88.02 88.02 86.38 89.67 2.32 0.03
## 3 S_Rossa 6 77.56 74.86 68.27 89.32 8.33 0.11
## 4 S_Variegata 8 67.40 71.47 25.99 87.03 19.67 0.29
## 5 S_Variegata 3 15 71.45 77.12 26.14 88.57 16.32 0.23
#The code performs the Kruskal-Wallis tests to assess the equality of medians among different groups for the variables "Perc_extracted_volume," "SDI," and "perc_nco" based on the levels of the "Raw_material" column in the respective datasets "coresvrm" and "cores_final."
kruskal.test(Perc_extracted_volume ~ Raw_material, data = coresvrm)
##
## Kruskal-Wallis rank sum test
##
## data: Perc_extracted_volume by Raw_material
## Kruskal-Wallis chi-squared = 7.0878, df = 4, p-value = 0.1313
kruskal.test(SDI ~ Raw_material, data = cores_final)
##
## Kruskal-Wallis rank sum test
##
## data: SDI by Raw_material
## Kruskal-Wallis chi-squared = 3.3052, df = 4, p-value = 0.5081
kruskal.test(perc_nco ~ Raw_material, data = cores_final)
##
## Kruskal-Wallis rank sum test
##
## data: perc_nco by Raw_material
## Kruskal-Wallis chi-squared = 12.101, df = 4, p-value = 0.01661
dummymat <- data.frame(Raw_material = c("Maiolica", "Other", "S_Rossa", "S_Variegata", "S_Variegata 3"),
Perc_extracted_volume = rep(100, 5))
reduction_cores <- cores_final[c(4, 35)]
weibull_cores <- rbind(reduction_cores, dummymat)
materials <- unique(weibull_cores$Raw_material)
weibull_results <- data.frame(Raw_material = character(),
shape = numeric(),
scale = numeric(),
stringsAsFactors = FALSE)
for (material in materials) {
subset_data <- filter(weibull_cores, Raw_material == material)
if (nrow(subset_data) > 1) {
w <- suppressWarnings(fitdist(subset_data$Perc_extracted_volume, "weibull"))
weibull_results <- rbind(weibull_results, data.frame(Raw_material = material,
shape = w$estimate[1],
scale = w$estimate[2]))
}
}
weibull_results
## Raw_material shape scale
## shape Maiolica 8.346369 79.62398
## shape1 Other 16.538963 94.85905
## shape2 S_Variegata 3.836945 75.44261
## shape3 S_Variegata 3 5.792190 78.67538
## shape4 S_Rossa 7.887498 85.56734
km_rm <- survfit(Surv(Perc_extracted_volume) ~ Raw_material , data = cores_final)
Figure7 <- ggsurvplot(
fit = km_rm,
risk.table = FALSE,
pval = FALSE,
conf.int = FALSE,
xlim = c(0, 100),
break.time.by = 5,
ggtheme = theme_classic(),
risk.table.y.text.col = T,
risk.table.y.text = FALSE,
surv.median.line = "hv",
palette = c("#295673ff", "#4f90a6ff", "#68abb8ff", "#84c3c9ff", "#a7dcd8ff") # Specify desired colors
)
Figure7
Figure 7. Kaplan Meier plot showing the survival probability of Fumane Cave cores by raw material along the reduction continuum (i.e., Percentage of Extracted Volume), predicted by the VRM.
Figure8a<-ggplot(coresvrm, aes(x=Classification_B, y=Perc_extracted_volume, fill=Classification_B)) +
geom_boxplot(alpha=.7) +
geom_jitter(aes(colour=factor(Classification_B)), position=position_jitter(0.2)) +
theme_classic() +
theme(legend.position="none", text=element_text(size=14)) +
labs(x="Core type", y="Percentage of Extracted Volume (%)") +
scale_y_continuous(breaks=seq(from=0, to=100, by=10), limits=c(0, 100)) +
scale_fill_manual(values=c("#52499fff", "#63589fff", "#826dbaff", "#9f82ceff", "#b998ddff", "#d1afe8ff")) +
scale_color_manual(values=c("#52499fff","#63589fff", "#826dbaff", "#9f82ceff", "#b998ddff", "#d1afe8ff"))
Figure8b<- ggplot(cores_final, aes(x=Classification_B, y=SDI, fill=Classification_B)) +
geom_boxplot(alpha=.7) +
geom_jitter(aes(colour=factor(Classification_B)), position=position_jitter(0.2)) +
theme_classic() +
theme(legend.position="none", text=element_text(size=14)) +
labs(x="Core type", y="SDI") +
scale_y_continuous(breaks = seq(from= 0, to=0.015, by=0.001), limits =c(0,0.015)) +
scale_fill_manual(values=c("#52499fff", "#63589fff", "#826dbaff", "#9f82ceff", "#b998ddff", "#d1afe8ff")) +
scale_color_manual(values=c("#52499fff","#63589fff", "#826dbaff", "#9f82ceff", "#b998ddff", "#d1afe8ff"))
Figure8c<-ggplot(cores_final, aes(x=Classification_B, y=perc_nco, fill=Classification_B)) +
geom_boxplot(alpha=.7) +
geom_jitter(aes(colour=factor(Classification_B)), position=position_jitter(0.2)) +
theme_classic() +
theme(legend.position="none", text=element_text(size=14)) +
labs(x = "Core type",y = "Percentage of Non cortical surface(%)") +
scale_y_continuous(breaks = seq(from= 0, to=100, by=10), limits =c(0,100)) +
scale_fill_manual(values=c("#52499fff", "#63589fff", "#826dbaff", "#9f82ceff", "#b998ddff", "#d1afe8ff")) +
scale_color_manual(values=c("#52499fff","#63589fff", "#826dbaff", "#9f82ceff", "#b998ddff", "#d1afe8ff"))
Figure8d<-ggplot(cores_final, aes(x=Classification_B, y=Volume, fill=Classification_B)) +
geom_boxplot(alpha=.7) +
geom_jitter(aes(colour=factor(Classification_B)), position=position_jitter(0.2)) +
theme_classic() +
theme(legend.position="none", text=element_text(size=14)) +
labs(x = "Core type",y = "Volume (cm3)") +
scale_y_continuous(breaks = seq(from= 0, to=150, by=10), limits =c(0,150)) +
scale_fill_manual(values=c("#52499fff", "#63589fff", "#826dbaff", "#9f82ceff", "#b998ddff", "#d1afe8ff")) +
scale_color_manual(values=c("#52499fff","#63589fff", "#826dbaff", "#9f82ceff", "#b998ddff", "#d1afe8ff"))
Figure8az<-ggarrange(Figure8a, labels = "a")
Figure8bz<-ggarrange(Figure8b, labels = "b")
Figure8cz<-ggarrange(Figure8c, labels = "c")
Figure8dz<-ggarrange(Figure8d, labels = "d")
Figure8<-grid.arrange(arrangeGrob(Figure8az,Figure8bz, ncol=2),arrangeGrob(Figure8cz,Figure8dz, ncol=2), nrow= 2)
#To save the figure run the code below
#ggsave("Figure 8 Core Type", plot = Figure8, width=40, height = 40, device = "png", units = "cm",dpi=600)
Figure 8. Boxplot and jitter plot showing the results for a) PEV obtained through VRM, b) SDI) c) Percentage of non-cortical surface, and d) volume of the cores according to raw core type.
Figure9a<-ggplot(coresvrm, aes(x=Production, y=Perc_extracted_volume, fill=Production)) +
geom_boxplot(alpha=.7) +
geom_jitter(aes(colour=factor(Production)), position=position_jitter(0.2)) +
theme_classic() +
theme(legend.position="none", text=element_text(size=14)) +
labs(x="Production", y="Percentage of Extracted Volume (%)") +
scale_y_continuous(breaks=seq(from=0, to=100, by=10), limits=c(0, 100)) +
scale_fill_manual(values=c("#ff6600ff", "#ff7f2aff", "#45708eff", "#4570abff")) +
scale_color_manual(values=c("#ff6600ff", "#ff7f2aff", "#45708eff", "#4570abff"))
Figure9b<- ggplot(cores_final, aes(x=Production, y=SDI, fill=Production)) +
geom_boxplot(alpha=.7) +
geom_jitter(aes(colour=factor(Production)), position=position_jitter(0.2)) +
theme_classic() +
theme(legend.position="none", text=element_text(size=14)) +
labs(x="Production", y="SDI") +
scale_y_continuous(breaks = seq(from= 0, to=0.015, by=0.001), limits =c(0,0.015)) +
scale_fill_manual(values=c("#ff6600ff", "#ff7f2aff", "#45708eff", "#4570abff")) +
scale_color_manual(values=c("#ff6600ff", "#ff7f2aff", "#45708eff", "#4570abff"))
Figure9c<-ggplot(cores_final, aes(x=Production, y=perc_nco, fill=Production)) +
geom_boxplot(alpha=.7) +
geom_jitter(aes(colour=factor(Production)), position=position_jitter(0.2)) +
theme_classic() +
theme(legend.position="none", text=element_text(size=14)) +
labs(x = "Production",y = "Percentage of Non cortical surface(%)") +
scale_y_continuous(breaks = seq(from= 0, to=100, by=10), limits =c(0,100)) +
scale_fill_manual(values=c("#ff6600ff", "#ff7f2aff", "#45708eff", "#4570abff")) +
scale_color_manual(values=c("#ff6600ff", "#ff7f2aff", "#45708eff", "#4570abff"))
Figure9d<-ggplot(cores_final, aes(x=Production, y=Volume, fill=Production)) +
geom_boxplot(alpha=.7) +
geom_jitter(aes(colour=factor(Production)), position=position_jitter(0.2)) +
theme_classic() +
theme(legend.position="none", text=element_text(size=14)) +
labs(x = "Production",y = "Volume (cm3)") +
scale_y_continuous(breaks = seq(from= 0, to=150, by=10), limits =c(0,150)) +
scale_fill_manual(values=c("#ff6600ff", "#ff7f2aff", "#45708eff", "#4570abff")) +
scale_color_manual(values=c("#ff6600ff", "#ff7f2aff", "#45708eff", "#4570abff"))
Figure9az<-ggarrange(Figure9a, labels = "a")
Figure9bz<-ggarrange(Figure9b, labels = "b")
Figure9cz<-ggarrange(Figure9c, labels = "c")
Figure9dz<-ggarrange(Figure9d, labels = "d")
Figure9<-grid.arrange(arrangeGrob(Figure9az,Figure9bz, ncol=2),arrangeGrob(Figure9cz,Figure9dz, ncol=2), nrow= 2)
Figure 9. Boxplot and jitter plot showing the results for a) PEV obtained through VRM, b) SDI) c) Percentage of non-cortical surface, and d) volume of the cores according to blank production.
gg_tbl1 <- cores_final %>%
dplyr::group_by(Raw_material) %>%
dplyr::summarise(total_count = dplyr::n(), .groups = 'drop')
4676/91
## [1] 51.38462
517/2
## [1] 258.5
397/6
## [1] 66.16667
621/9
## [1] 69
468/16
## [1] 29.25