Data preparation

Import of the Findings data including time slice and location

library(magrittr)
library(knitr)
library(kableExtra)
library(readxl)
library(dplyr)
site_quality <- read_excel(
  path = "../data/raw_data/tables/B1Daten_A2Daten_fertig.xlsx",
  sheet = "codes") %>% 
  dplyr::select("site quality") %>% 
  mutate(code = unlist(Map(function(x){x[1]},x=strsplit(`site quality`," - ")))) %>% 
  mutate(quality = unlist(Map(function(x){x[2]},x=strsplit(`site quality`," - ")))) %>% 
  dplyr::select(-"site quality")

time_slice <- read_excel(
  path = "../data/raw_data/tables/B1Daten_A2Daten_fertig.xlsx",
  sheet = "codes") %>% 
  dplyr::select(`Time slice`) %>% 
  mutate(code = unlist(Map(function(x){x[1]},x=strsplit(`Time slice`," - ")))) %>% 
  mutate(time = unlist(Map(function(x){x[2]},x=strsplit(`Time slice`," - ")))) %>% 
  dplyr::select(-`Time slice`)

b1data <- read_excel(
  path = "../data/raw_data/tables/B1Daten_A2Daten_fertig.xlsx",
  sheet = "B1Daten_A2Daten_fertig") %>% 
  mutate(Time_slice = time_slice$time[match(`Time slice`,time_slice$code)]) %>% 
  mutate(Site_quality = site_quality$quality[match(`Site quality`,site_quality$code)]) %>% 
  mutate(x = as.numeric(Ost)) %>% 
  mutate(y = as.numeric(Nord)) %>% 
  dplyr::select(-Ost,-Nord,-`Time slice`,-`Site quality`) %>% 
  dplyr::filter(Site_quality != "single find") %>% 
  left_join(.,tibble(Time_slice = c("Ahrensburgian (appr. Younger Dryas, GS-1, c. 10,770-9,670 cal. BCE)",
                                    "Federmesser-Gruppen (FMG)",
                                    "Brommean",
                                    "classic Hamburgian",
                                    "Federmesser-Gruppen (FMG) / Brommean (appr. mid-late Lateglacial Interstadial, GI-1c-a, c. 11,970-10,770 cal. BCE)",
                                    "Hamburgian (appr. early Lateglacial Interstadial, GI-1e+d, c. 12,690-11,970 cal. BCE)",
                                    "Havelte Group"),
                     Time = c("Reindeerhunter",
                              "Elkhunter",
                              "Elkhunter",
                              "Reindeerhunter",
                              "Elkhunter",
                              "Reindeerhunter",
                              "Reindeerhunter")),
            by = "Time_slice")

b1data
#> # A tibble: 104 x 8
#>    la_id fs_id  Gemeinde  Time_slice      Site_quality        x     y Time 
#>    <chr> <chr>  <chr>     <chr>           <chr>           <dbl> <dbl> <chr>
#>  1 1     51026~ Eggstedt  Ahrensburgian ~ excavation/sys~  9.28  54.1 Rein~
#>  2 3     53008~ Behlendo~ Ahrensburgian ~ collection of ~ 10.6   53.7 Rein~
#>  3 4     53016~ Brunsmark Ahrensburgian ~ collection of ~ 10.7   53.6 Rein~
#>  4 4     53016~ Brunsmark Federmesser-Gr~ collection of ~ 10.7   53.6 Elkh~
#>  5 4     53016~ Brunsmark Federmesser-Gr~ collection of ~ 10.7   53.6 Elkh~
#>  6 5     56029~ Klein No~ Ahrensburgian ~ collection of ~  9.69  53.7 Rein~
#>  7 6     56029~ Klein No~ Federmesser-Gr~ excavation/sys~  9.69  53.7 Elkh~
#>  8 7     56029~ Klein No~ Ahrensburgian ~ collection of ~  9.69  53.7 Rein~
#>  9 7     56029~ Klein No~ Federmesser-Gr~ collection of ~  9.69  53.7 Elkh~
#> 10 8     56029~ Klein No~ Ahrensburgian ~ collection of ~  9.68  53.7 Rein~
#> # ... with 94 more rows

Creation of spatial Simple Feature and export as Geopackage

library(sf)
library(mapview)
library(raster)

b1data_sp <- st_as_sf(b1data, coords = c("x", "y"), crs = 4326)

st_write(b1data_sp,"../data/derived_data/vector/b1_locations.gpkg", "b1_locations", layer_options = c("OVERWRITE=yes"))
#> Updating layer `b1_locations' to data source `../data/derived_data/vector/b1_locations.gpkg' using driver `GPKG'
#> options:        OVERWRITE=yes 
#> features:       104
#> fields:         6
#> geometry type:  Point

m2 <- mapview(b1data_sp, legend = FALSE)
mapshot(m2, file = "../plots/arch_dat_plot.png")
grid::grid.raster(png::readPNG("../plots/arch_dat_plot.png"))

Calculate mean coordinate for points of the same time (Reindeerhunter or Elkhunter) which are closer than 1000 meters

b1data_sp_centroids <- do.call("rbind",Map(function(timeunit){
  b1data_sp_time <- st_transform(b1data_sp, 25832) %>% 
    dplyr::filter(Time == timeunit)
  #dplyr::filter(Time_slice == timeunit)
  
  clustbydist <- hclust(dist(data.frame(rownames=rownames(b1data_sp_time), 
                                        x=st_coordinates(b1data_sp_time)[,1],
                                        y=st_coordinates(b1data_sp_time)[,2])), 
                        method="complete")
  
  # Define distance in meters
  b1data_sp_time$Clust <- cutree(clustbydist, h=1000) 
  
  # Aggregate points by taking mean x and mean y coordinate
  b1data_sp_centroids <-st_as_sf(
    as_tibble(do.call("rbind",
                      Map(
                        function(x){
                          apply(
                            st_coordinates(b1data_sp_time %>% 
                                             filter(Clust == x)),
                            2,
                            mean)},
                        x=unique(b1data_sp_time$Clust)))),
    coords = c("X", "Y"), crs = 25832)
  b1data_sp_centroids <- st_transform(b1data_sp_centroids, 4326)
  
  b1data_sp_centroids$Time <- timeunit
  
  return(b1data_sp_centroids)
},timeunit=unique(b1data_sp$Time)))

st_write(b1data_sp_centroids,
         "../data/derived_data/vector/b1_locations_centroid.gpkg", 
         "b1_locations_centroid", 
         layer_options = c("OVERWRITE=yes"))
#> Updating layer `b1_locations_centroid' to data source `../data/derived_data/vector/b1_locations_centroid.gpkg' using driver `GPKG'
#> options:        OVERWRITE=yes 
#> features:       68
#> fields:         1
#> geometry type:  Point

Calculate mean coordinate for points of the same time slice which are closer than 1000 meters

b1data_sp_centroids <- do.call("rbind",Map(function(timeunit){
  b1data_sp_time <- st_transform(b1data_sp, 25832) %>% 
    dplyr::filter(Time_slice == timeunit)
  
  clustbydist <- hclust(dist(data.frame(rownames=rownames(b1data_sp_time), 
                                        x=st_coordinates(b1data_sp_time)[,1],
                                        y=st_coordinates(b1data_sp_time)[,2])), 
                        method="complete")
  
  # Define distance in meters
  b1data_sp_time$Clust <- cutree(clustbydist, h=1000) 
  
  # Aggregate points by taking mean x and mean y coordinate
  b1data_sp_centroids <-st_as_sf(
    as_tibble(do.call("rbind",
                      Map(
                        function(x){
                          apply(
                            st_coordinates(b1data_sp_time %>% 
                                             filter(Clust == x)),
                            2,
                            mean)},
                        x=unique(b1data_sp_time$Clust)))),
    coords = c("X", "Y"), crs = 25832)
  b1data_sp_centroids <- st_transform(b1data_sp_centroids, 4326)
  
  b1data_sp_centroids$Time <- timeunit
  
  return(b1data_sp_centroids)
},timeunit=unique(b1data_sp$Time_slice)))

st_write(b1data_sp_centroids,
         "../data/derived_data/vector/b1_locations_centroid_Time_slice.gpkg", 
         "b1_locations_centroid_Time_slice", 
         layer_options = c("OVERWRITE=yes"))
#> Updating layer `b1_locations_centroid_Time_slice' to data source `../data/derived_data/vector/b1_locations_centroid_Time_slice.gpkg' using driver `GPKG'
#> options:        OVERWRITE=yes 
#> features:       80
#> fields:         1
#> geometry type:  Point