# packages
library(metaDigitise)
library(magrittr)
library(tibble)
library(dplyr)
library(lubridate)
library(dpeatdecomposition)
library(dm)
library(RMariaDB)
Connect to database
# connect to database
con <-
RMariaDB::dbConnect(
drv = RMariaDB::MariaDB(),
dbname = "dpeatdecomposition",
default.file = "~/my.cnf"
)
# get database as dm object
dm_dpeatdecomposition <-
dpeatdecomposition::dp_get_dm(con, learn_keys = TRUE)
Get most current IDs
id_last <-
list(
id_dataset =
dm_dpeatdecomposition %>%
dm::pull_tbl(datasets) %>%
dplyr::pull(id_dataset) %>%
tail(1),
id_sample =
dm_dpeatdecomposition %>%
dm::pull_tbl(samples) %>%
dplyr::pull(id_sample) %>%
tail(1),
id_measurement =
dm_dpeatdecomposition %>%
dm::pull_tbl(data) %>%
dplyr::pull(id_measurement) %>%
tail(1)
) %>%
purrr::map(function(.x) {
if(length(.x) == 0) {
0L
} else {
.x
}
})
Create directories
dir_name <- "d20"
dir_source <- "../raw_data/data/d20"
dir_target <- paste0("../derived_data/", id_last$id_dataset + 1L)
if(!dir.exists(dir_target)) {
dir.create(dir_target)
}
datasets <-
tibble::tibble(
id_dataset = id_last$id_dataset + 1L
)
citations_to_datasets <-
dplyr::bind_rows(
db_template_tables$citations_to_datasets,
tibble::tibble(
id_dataset = datasets$id_dataset,
id_citation = c("Prevost.1997")
)
)
# mass remaining
samples2 <-
dplyr::bind_rows(
readRDS(paste0(dir_source, "/raw/caldat/Prevost.1997-Fig7a"))$processed_data,
readRDS(paste0(dir_source, "/raw/caldat/Prevost.1997-Fig7b"))$processed_data,
readRDS(paste0(dir_source, "/raw/caldat/Prevost.1997-Fig7c"))$processed_data,
readRDS(paste0(dir_source, "/raw/caldat/Prevost.1997-Fig7d"))$processed_data,
readRDS(paste0(dir_source, "/raw/caldat/Prevost.1997-Fig7e"))$processed_data,
readRDS(paste0(dir_source, "/raw/caldat/Prevost.1997-Fig7f"))$processed_data
) %>%
dplyr::mutate(
id =
id %>%
stringr::str_replace(pattern = "treatmentontrol", replacement = "treatmentcontrol"), #---note: correct id for some samples
incubation_duration =
purrr::map_int(x, function(.x) {
which.min(abs(.x - c(0:3))) - 1L
}) %>%
as.numeric(),
mass_relative_mass = (100 - y)/100,
mass_absolute = NA_real_,
sample_depth_upper =
dplyr::case_when(
stringr::str_detect(id, "depth10") ~ 10,
stringr::str_detect(id, "depth30") ~ 30
),
sample_depth_lower = sample_depth_upper,
taxon_rank_value =
dplyr::case_when(
stringr::str_detect(id, "Sphagnum") ~ "Sphagnum",
stringr::str_detect(id, "Betula") ~ "Betula papyrifera",
stringr::str_detect(id, "cellulose") ~ NA_character_,
),
taxon_rank_name =
dplyr::case_when(
stringr::str_detect(id, "Sphagnum") ~ "genus",
stringr::str_detect(id, "Betula") ~ "species",
stringr::str_detect(id, "cellulose") ~ NA_character_,
),
sample_type =
dplyr::case_when(
stringr::str_detect(id, "Sphagnum") ~ "peat",
stringr::str_detect(id, "Betula") ~ "litter",
stringr::str_detect(id, "cellulose") ~ "cellulose",
),
sample_type2 =
dplyr::case_when(
stringr::str_detect(id, "Sphagnum") ~ NA_character_,
stringr::str_detect(id, "Betula") ~ "Betula papyrifera (non-peatland species) wood sticks of 0.6 cm diameter",
stringr::str_detect(id, "cellulose") ~ "Whatman no. 1 filter papers, 9-cm in diameter",
),
taxon_organ =
dplyr::case_when(
stringr::str_detect(id, "Sphagnum") ~ "whole plant",
stringr::str_detect(id, "Betula") ~ "branches",
stringr::str_detect(id, "cellulose") ~ NA_character_,
),
distance_to_ditch =
id %>%
stringr::str_extract(pattern = "distance\\d+$") %>%
stringr::str_extract(pattern = "\\d+$") %>%
as.numeric(),
sample_treatment =
dplyr::case_when(
stringr::str_detect(id, pattern = "control") ~ "control",
TRUE ~ "drainage"
),
sampling_date =
as.Date("1991-10-15") + lubridate::years(incubation_duration),
incubation_duration =
incubation_duration %>%
lubridate::dyears() %>%
lubridate::time_length(unit = "days"),
sampling_year = lubridate::year(sampling_date),
sampling_month = lubridate::month(sampling_date),
sampling_longitude =
"69°15'W" %>%
sp::char2dms(chd = "°", chm = "'", chs = "''") %>%
as.numeric(),
sampling_latitude =
"47°49'N" %>%
sp::char2dms(chd = "°", chm = "'", chs = "''") %>%
as.numeric(),
is_incubated = TRUE,
incubation_environment = "peat",
id_dataset = id_last$id_dataset + 1,
mesh_size_absolute = 1,
experimental_design =
paste0(
as.numeric(as.factor(sample_treatment)), "_",
as.numeric(as.factor(ifelse(is.na(distance_to_ditch), "a", distance_to_ditch)))
),
comment_samples = "Coordinates are only the approximate location of the study site, but not sampling points.",
comment_samples =
paste0(
comment_samples,
dplyr::case_when(
stringr::str_detect(id, "control") ~ " treatment = 'control' means that these litter bags were buried in a location next to the drained areas of the peatland.",
TRUE ~ ""
)
)
) %>%
dplyr::select(-x, -y, -id, -group, -col, -pch, -y_variable, -x_variable)
# initial masses
samples1 <-
samples2 %>%
dplyr::filter(!duplicated(paste0(experimental_design, "_", sample_type, "_", sample_depth_upper))) %>%
dplyr::mutate(
mass_relative_mass = 1.0,
mass_remaining_error = 0.0,
incubation_duration = 0.0,
sampling_year = 1991,
sampling_month = 10,
sampling_date = as.Date("1991-10-15"),
id_sample = seq_len(nrow(.)) + id_last$id_sample,
id_sample_parent = id_sample,
id_sample_origin = id_sample,
id_sample_incubation_start = id_sample
)
samples2 <-
samples2 %>%
dplyr::mutate(
id_sample = seq_len(nrow(.)) + max(samples1$id_sample),
id_sample_origin =
dplyr::left_join(
samples2 %>% dplyr::select(experimental_design, sample_type, sample_depth_upper),
samples1 %>% dplyr::select(experimental_design, sample_type, sample_depth_upper, id_sample),
by = c("experimental_design", "sample_type", "sample_depth_upper")
) %>%
dplyr::pull(id_sample),
id_sample_incubation_start = id_sample_origin,
id_sample_parent =
purrr::map_int(seq_len(nrow(.)), function(i) {
index <- paste0(experimental_design, "_", sample_type, "_", sample_depth_upper) == paste0(experimental_design, "_", sample_type, "_", sample_depth_upper)[[i]] & incubation_duration < incubation_duration[[i]]
if(! any(index)) {
id_sample_origin[[i]]
} else {
target_incubation_duration <- max(incubation_duration[index])
index <- index & incubation_duration == target_incubation_duration
id_sample[index]
}
})
)
# water table depth and peat subsidence
samples3 <-
dplyr::bind_rows(
readRDS(paste0(dir_source, "/raw/caldat/Prevost.1997-Fig2k"))$processed_data,
readRDS(paste0(dir_source, "/raw/caldat/Prevost.1997-Fig2l"))$processed_data,
readRDS(paste0(dir_source, "/raw/caldat/Prevost.1997-Fig2m"))$processed_data,
readRDS(paste0(dir_source, "/raw/caldat/Prevost.1997-Fig2n"))$processed_data,
readRDS(paste0(dir_source, "/raw/caldat/Prevost.1997-Fig2o"))$processed_data
) %>%
dplyr::mutate(
distance_to_ditch = c(rep(c(0, 10, 20), 4), rep(rep(c(0, 1.5, 5, 10, 15, 20), 4), 4)),
sample_treatment =
dplyr::case_when(
stringr::str_detect(id, pattern = "control") ~ "control",
TRUE ~ "drainage"
),
sampling_year =
id %>%
stringr::str_extract(pattern = "year\\d+$") %>%
stringr::str_remove(pattern = "^year") %>%
as.numeric(),
variable =
dplyr::case_when(
stringr::str_detect(id, "peat") ~ "peat_surface_depth", #---note: I think, based on Fig. 2, that this is the depth of the peat surface relative to the respective peat surface at the left point of each drainage transect (before drainage). This means that absolute peat heights are meaningless and therefore I record in the final version only peat height changes and water table depths
stringr::str_detect(id, "min") ~ "minimum_water_table_depth",
stringr::str_detect(id, "max") ~ "maximum_water_table_depth",
stringr::str_detect(id, "mean") ~ "mean_water_table_depth"
)
) %>%
dplyr::arrange(sampling_year, distance_to_ditch) %>%
dplyr::mutate(
index = paste0(sampling_year, "_", distance_to_ditch),
water_table_depth =
purrr::map(unique(index), function(.x) {
c(NA_real_, y[index == .x][-1] - y[index == .x][[1]])
}) %>%
unlist(),
index = paste0(variable, "_", distance_to_ditch)
) %>%
dplyr::arrange(index) %>%
dplyr::mutate(
peat_subsidence_relative_to_1990 =
purrr::map(unique(index), function(.x) {
if(stringr::str_detect(.x, pattern = "peat_surface")) {
c(0, y[index == .x][-1] - y[index == .x][[1]])
} else {
rep(NA_real_, sum(index == .x))
}
}) %>%
unlist(),
sampling_longitude =
"69°15'W" %>%
sp::char2dms(chd = "°", chm = "'", chs = "''") %>%
as.numeric(),
sampling_latitude =
"47°49'N" %>%
sp::char2dms(chd = "°", chm = "'", chs = "''") %>%
as.numeric(),
sample_type = "peat",
sample_depth_upper = 0,
sample_depth_lower = 0,
id_dataset = datasets$id_dataset[[1]],
id_sample = seq_len(nrow(.)) + max(samples2$id_sample),
id_sample_origin = id_sample,
id_sample_parent = id_sample,
is_incubated = FALSE,
incubation_duration = 0.0,
comment_samples = "Coordinates are only the approximate location of the study site, but not sampling points.",
comment_samples =
paste0(
comment_samples,
dplyr::case_when(
stringr::str_detect(id, "peat") ~ " Peat subsidence values are relative to the height of the peat at the same location in 1990.",
!stringr::str_detect(id, "peat") ~ " Water table depths are summary statistics (minimum, mean, or maximum) of multiple measurements in the sampling year."
)
),
comment_samples =
paste0(
comment_samples,
dplyr::case_when(
stringr::str_detect(id, "control") & !stringr::str_detect(id, "peat") ~ " treatment = 'control' means that this are the same locations in the peatland, but before drainage.",
TRUE ~ ""
)
)
) %>%
dplyr::select(-x, -y, -id, -group, -col, -pch, -y_variable, -x_variable, -index) %>%
dplyr::filter((sample_treatment == "drainage" & distance_to_ditch %in% na.omit(samples2$distance_to_ditch)) | (sample_treatment == "control" & distance_to_ditch == 0)) %>%
dplyr::mutate(
distance_to_ditch =
dplyr::case_when(
sample_treatment == "drainage" ~ distance_to_ditch,
TRUE ~ NA_real_
)
) %>%
dplyr::left_join(
samples2 %>%
dplyr::filter(!duplicated(experimental_design)) %>%
dplyr::select(sample_treatment, distance_to_ditch, experimental_design),
by = c("sample_treatment", "distance_to_ditch")
) %>%
dplyr::filter(variable == "mean_water_table_depth")
## combine
samples <-
dplyr::bind_rows(
db_template_tables$samples,
samples1 %>%
dplyr::mutate(
type = "samples1"
),
samples2 %>%
dplyr::mutate(
type = "samples2"
),
samples3 %>%
dplyr::mutate(
type = "samples3"
)
)
samples_to_samples <-
samples %>%
dplyr::filter(! id_sample %in% id_sample_origin) %>%
dplyr::mutate(
transition_description =
dplyr::case_when(
type %in% c("samples2") ~ "wait",
TRUE ~ NA_character_
)
) %>%
dplyr::select(id_sample_parent, id_sample, transition_description) %>%
dplyr::rename(
id_sample_child = "id_sample"
)
d1 <-
dplyr::bind_rows(
samples1, samples2
) %>%
tidyr::pivot_longer(
cols = dplyr::all_of(c("mass_absolute", "mass_relative_mass", "mesh_size_absolute")),
names_to = "attribute_name",
values_to = "value"
) %>%
dplyr::mutate(
id_measurement = seq_len(nrow(.)) + id_last$id_measurement,
id_measurement_numerator =
purrr::map_int(seq_len(nrow(.)), function(i) {
switch(
attribute_name[[i]],
"mass_relative_mass" = {
id_measurement[id_sample == id_sample[[i]] & attribute_name == "mass_absolute"]
},
NA_integer_
)
}),
id_measurement_denominator =
purrr::map_int(seq_len(nrow(.)), function(i) {
switch(
attribute_name[[i]],
"mass_relative_mass" = {
id_measurement[id_sample == id_sample_parent[[i]] & attribute_name == "mass_absolute"]
},
NA_integer_
)
}),
value_type =
dplyr::case_when(
attribute_name == "mesh_size_absolute" ~ "point",
TRUE ~ "mean"
),
error_type =
dplyr::case_when(
attribute_name %in% c("mesh_size_absolute", "mass_absolute") ~ NA_character_,
TRUE ~ "se"
),
sample_size =
dplyr::case_when(
attribute_name %in% c("mesh_size_absolute") ~ NA_integer_,
TRUE ~ 5L
)
)
d3 <-
samples3 %>%
tidyr::pivot_longer(
cols = dplyr::all_of(c("water_table_depth")),
names_to = "attribute_name",
values_to = "value"
) %>%
dplyr::mutate(
id_measurement = seq_len(nrow(.)) + max(d1$id_measurement),
value_type = "mean",
sample_size = NA_integer_
)
# combine
d <-
dplyr::bind_rows(
db_template_tables$data,
d1,
d3
) %>%
dplyr::select(dplyr::all_of(colnames(db_template_tables$data)))
experimental_design_format <-
tibble::tibble(
id_dataset = datasets$id_dataset,
file = paste0(id_last$id_dataset + 1L, "/experimental_design_format.csv"),
experimental_design_description = "`sample_treatment`: A label for the drainage conditions. `distance_to_drainage_ditch`: The distance [m] to the next drainage ditch."
)
# csv file to export
experimental_design_format2 <-
samples %>%
dplyr::filter(! is.na(experimental_design)) %>%
dplyr::filter(! duplicated(experimental_design)) %>%
dplyr::select(experimental_design, sample_treatment, distance_to_ditch)
# export
write.csv(experimental_design_format2, paste0(dir_target, "/experimental_design_format.csv"), row.names = FALSE)
# list all tables
dm_insert_in <-
list(
datasets =
datasets %>%
dplyr::select(dplyr::all_of(colnames(dm_dpeatdecomposition$datasets))),
samples =
samples %>%
dplyr::select(dplyr::all_of(colnames(dm_dpeatdecomposition$samples))),
data =
d %>%
dplyr::select(dplyr::all_of(colnames(dm_dpeatdecomposition$data))),
samples_to_samples =
samples_to_samples %>%
dplyr::select(dplyr::all_of(colnames(dm_dpeatdecomposition$samples_to_samples))),
citations_to_datasets =
citations_to_datasets %>%
dplyr::select(dplyr::all_of(colnames(dm_dpeatdecomposition$citations_to_datasets))),
experimental_design_format =
experimental_design_format %>%
dplyr::select(dplyr::all_of(colnames(dm_dpeatdecomposition$experimental_design_format)))
)
# check whether all column names as present in table attributes
all_column_names <-
purrr::map(dm_insert_in, colnames) %>%
unlist() %>%
unique()
if(! all(all_column_names %in% (dm_dpeatdecomposition %>% dm::pull_tbl(attributes) %>% dplyr::pull(attribute_name)))) {
cond <- purrr::map_lgl(all_column_names, function(.x) ! .x %in% (dm_dpeatdecomposition %>% dm::pull_tbl(attributes) %>% dplyr::pull(attribute_name)))
RMariaDB::dbDisconnect(con)
stop(paste0("New `attribute_name`s discovered: ", paste(all_column_names[cond], collapse = ", ")))
}
all_data_attributes <- unique(dm_insert_in$data$attribute_name)
if(! all(all_data_attributes %in% (dm_dpeatdecomposition %>% dm::pull_tbl(attributes) %>% dplyr::pull(attribute_name)))) {
RMariaDB::dbDisconnect(con)
cond <- purrr::map_lgl(all_data_attributes, function(.x) ! .x %in% (dm_dpeatdecomposition %>% dm::pull_tbl(attributes) %>% dplyr::pull(attribute_name)))
stop(paste0("New `attribute_name`s discovered: ", paste(all_data_attributes[cond], collapse = ", ")))
}
# filter empty tables
dm_insert_in_check <-
dm_insert_in[purrr::map_lgl(dm_insert_in, function(x) nrow(x) > 0)] %>%
dm::as_dm() %>%
dp_dm_add_keys(dm_dpeatdecomposition)
# copy into dm_pmird
for(i in seq_along(dm_insert_in)) {
RMariaDB::dbAppendTable(con, name = names(dm_insert_in)[[i]], value = dm_insert_in[[i]])
}
RMariaDB::dbDisconnect(con)