# packages
library(metaDigitise)
library(magrittr)
library(tibble)
library(dplyr)
library(lubridate)
library(dpeatdecomposition)
library(dm)
library(RMariaDB)
Connect to database
# connect to database
con <-
RMariaDB::dbConnect(
drv = RMariaDB::MariaDB(),
dbname = "dpeatdecomposition",
default.file = "~/my.cnf"
)
# get database as dm object
dm_dpeatdecomposition <-
dpeatdecomposition::dp_get_dm(con, learn_keys = TRUE)
Get most current IDs
id_last <-
list(
id_dataset =
dm_dpeatdecomposition %>%
dm::pull_tbl(datasets) %>%
dplyr::pull(id_dataset) %>%
tail(1),
id_sample =
dm_dpeatdecomposition %>%
dm::pull_tbl(samples) %>%
dplyr::pull(id_sample) %>%
tail(1),
id_measurement =
dm_dpeatdecomposition %>%
dm::pull_tbl(data) %>%
dplyr::pull(id_measurement) %>%
tail(1)
) %>%
purrr::map(function(.x) {
if(length(.x) == 0) {
0L
} else {
.x
}
})
Create directories
dir_name <- "d22"
dir_source <- "../raw_data/data/d22"
dir_target <- paste0("../derived_data/", id_last$id_dataset + 1L)
if(!dir.exists(dir_target)) {
dir.create(dir_target)
}
datasets <-
tibble::tibble(
id_dataset = id_last$id_dataset + 1L
)
citations_to_datasets <-
dplyr::bind_rows(
db_template_tables$citations_to_datasets,
tibble::tibble(
id_dataset = datasets$id_dataset,
id_citation = c("Scheffer.2000")
)
)
## mass remaining
samples3 <-
readODS::read_ods(paste0(dir_source, "/derived/Scheffer.2000-Tab2.ods")) %>%
dplyr::rename(
mesh_size_absolute = "mesh_size",
sample_treatment = "treatment",
comments_samples = "comment_samples",
sample_depth_upper = "sampling_depth_upper",
sample_depth_lower = "sampling_depth_lower"
) %>%
dplyr::mutate(
sample_treatment = "control",
id_dataset = datasets$id_dataset[[1]],
mass_relative_mass = mass_remaining/100,
mass_absolute = NA_real_,
mass_remaining_error = mass_remaining_error/100,
sampling_longitude =
sampling_longitude %>%
sp::char2dms(chd = "°", chm = "'", chs = "''") %>%
as.numeric(),
sampling_latitude =
sampling_latitude %>%
sp::char2dms(chd = "°", chm = "'", chs = "''") %>%
as.numeric(),
incubation_duration =
incubation_duration %>%
lubridate::dmonths() %>%
lubridate::time_length(unit = "days"),
temperature = 11.4 + 273.15,
water_table_depth = 5,
incubation_environment = "peat",
is_incubated = TRUE,
experimental_design = as.character(as.numeric(as.factor(site_label)))
)
# initial mass
samples2 <-
samples3 %>%
dplyr::filter(! duplicated(paste0(site_label, "_", taxon_rank_value, "_", taxon_organ))) %>%
dplyr::mutate(
mass_relative_mass = 1.0,
mass_remaining_error = 0.0,
incubation_duration = 0.0,
sampling_date = as.Date("1994-10-01"),
sampling_year = lubridate::year(sampling_date),
sampling_month = lubridate::month(sampling_date)
)
# litter collection
samples1 <-
samples2 %>%
dplyr::filter(! duplicated(paste0(taxon_rank_value, "_", taxon_organ))) %>%
dplyr::mutate(
id_sample = seq_len(nrow(.)) + id_last$id_sample,
id_sample_origin = id_sample,
id_sample_parent = id_sample,
id_sample_incubation_start = NA_integer_,
sample_treatment = "treatment",
experimental_design = NA_character_,
is_incubated = FALSE,
incubation_environment = NA_character_,
site_label = NA_character_,
sampling_longitude = NA_real_,
sampling_latitude = NA_real_,
sampling_date = NA,
sampling_year = NA_real_,
sampling_month = NA_real_,
sample_depth_upper = NA_real_,
sample_depth_lower = NA_real_,
comments_samples = 'from @Scheffer.2001: "Cuttings of Carex diandra and Carex lasiocarpa were grown in sand culture in a greenhouse. Nutrient solution was supplied weekly. Nitrogen and phosphorus were supplied as ammonium nitrate and potassium phosphate, respectively, in an atomic mass ratio of 25 (N) to 1 (P) with a total supply during the growing period of around 7.5 g N m$^{-2}$ yr$^{-1}$. This rate of supply was comparable to N and P availability at these fen sites and was limiting for plant growth. Other nutrients were given in non-limiting amounts. After 11 months the plants were placed in a split-root design (cf. Aerts 1990) in which the root systems of the plants were equally partitioned over two adjacent containers filled with sand. One container was kept dry in order to kill one part of the roots gradually while the other container was still supplied with water. This design allows the plant to resorb nutrients from the dying roots to the remaining part of the plant. Moreover, by this method recently senesced root litter can be collected. After 3.5 months the dead roots were sorted and cleaned. The below-ground system of Carex lasiocarpa was separated into rhizomes and roots, whereas C. diandra only had roots."'
)
# add missing ids
samples2 <-
dplyr::bind_rows(
samples2 %>%
dplyr::mutate(
type = "samples2"
),
samples3 %>%
dplyr::mutate(
type = "samples3"
)
)
samples2 <-
samples2 %>%
dplyr::mutate(
id_sample = seq_len(nrow(.)) + max(samples1$id_sample),
id_sample_origin =
dplyr::left_join(
samples2 %>% dplyr::select(taxon_rank_value, taxon_organ),
samples1 %>% dplyr::select(taxon_rank_value, taxon_organ, id_sample),
by = c("taxon_rank_value", "taxon_organ")
) %>%
dplyr::pull(id_sample),
id_sample_incubation_start =
purrr::map_int(seq_len(nrow(.)), function(i) {
index <- paste0(taxon_rank_value, "_", site_label, "_", taxon_organ) == paste0(taxon_rank_value, "_", site_label, "_", taxon_organ)[[i]] & experimental_design == experimental_design[[i]] & incubation_duration == 0.0
id_sample[index]
}),
id_sample_parent =
purrr::map_int(seq_len(nrow(.)), function(i) {
index <- paste0(taxon_rank_value, "_", site_label, "_", taxon_organ) == paste0(taxon_rank_value, "_", site_label, "_", taxon_organ)[[i]] & experimental_design == experimental_design[[i]] & incubation_duration < incubation_duration[[i]]
if(! any(index)) {
id_sample_origin[[i]]
} else {
target_incubation_duration <- max(incubation_duration[index])
index <- index & incubation_duration == target_incubation_duration
id_sample[index]
}
})
) %>%
dplyr::left_join(
dplyr::bind_rows(
readODS::read_ods(paste0(dir_source, "/derived/Scheffer.2000-Tab3.ods"))
) %>%
dplyr::rename(
N_relative_mass2 = "N", #---note: placeholder name for N content relative to mass of the same sample (not relative to initial mass)
P_relative_mass2 = "P",
N_relative_mass2_error = "N_error",
P_relative_mass2_error = "P_error",
P_relative_mass2_error_type = "P_error_type",
P_relative_mass2_sample_size = "P_sample_size",
N_relative_mass2_error_type = "N_error_type",
N_relative_mass2_sample_size = "N_sample_size"
) %>%
dplyr::mutate(
dplyr::across(dplyr::any_of(c("N_relative_mass2", "N_relative_mass2_error", "P_relative_mass2", "P_relative_mass2_error")), magrittr::divide_by, 100),
N_absolute = NA_real_,
P_absolute = NA_real_
) %>%
dplyr::select(dplyr::all_of(c("site_label", "taxon_rank_value", "taxon_organ", "sampling_year", "sampling_month")) | dplyr::starts_with(c("N_", "P_"), ignore.case = FALSE)),
by = c("site_label", "taxon_rank_value", "taxon_organ", "sampling_year", "sampling_month")
) %>%
dplyr::left_join(
readODS::read_ods(paste0(dir_source, "/derived/Scheffer.2000-Tab1.ods")) %>%
dplyr::rename(
N_relative_mass = "N",
P_relative_mass = "P",
C_to_N = "CN"
) %>%
dplyr::mutate(
dplyr::across(dplyr::any_of(c("N_relative_mass", "N_error", "P_relative_mass", "P_error")), magrittr::divide_by, 1000),
C_relative_mass = NA_real_,
C_absolute = NA_real_,
sampling_year = 1994,
sampling_month = 10
) %>%
dplyr::select(dplyr::all_of(c("taxon_rank_value", "taxon_organ", "sampling_year", "sampling_month")) | dplyr::starts_with(c("N_", "C_", "P_", "CN_"), ignore.case = FALSE)),
by = c("taxon_rank_value", "taxon_organ", "sampling_year", "sampling_month")
)
## combine
samples <-
dplyr::bind_rows(
db_template_tables$samples,
samples1 %>%
dplyr::mutate(
type = "samples1"
),
samples2
) %>%
dplyr::mutate(
site_name = site_label
)
samples_to_samples <-
samples %>%
dplyr::filter(! id_sample %in% id_sample_origin) %>%
dplyr::mutate(
transition_description =
dplyr::case_when(
type %in% c("samples2") ~ "translocate",
type %in% c("samples3") ~ "wait",
TRUE ~ NA_character_
)
) %>%
dplyr::select(id_sample_parent, id_sample, transition_description) %>%
dplyr::rename(
id_sample_child = "id_sample"
)
d2 <-
samples2 %>%
tidyr::pivot_longer(
cols = dplyr::all_of(c("mass_absolute", "mass_relative_mass", "mesh_size_absolute", "C_to_N", "N_relative_mass", "C_relative_mass", "P_relative_mass", "C_absolute", "N_absolute", "P_absolute", "N_relative_mass2", "P_relative_mass2", "temperature", "water_table_depth")),
names_to = "attribute_name",
values_to = "value"
) %>%
dplyr::mutate(
id_measurement = seq_len(nrow(.)) + id_last$id_measurement,
id_measurement_numerator =
purrr::map_int(seq_len(nrow(.)), function(i) {
switch(
attribute_name[[i]],
"C_to_N" = {
id_measurement[id_sample == id_sample[[i]] & attribute_name == "C_relative_mass"]
},
"P_relative_mass" = ,
"N_relative_mass" = ,
"P_relative_mass2" = ,
"N_relative_mass2" = ,
"C_relative_mass" = {
id_measurement[id_sample == id_sample[[i]] & attribute_name == stringr::str_replace(attribute_name[[i]], "_relative_mass2?", "_absolute")]
},
"mass_relative_mass" = {
id_measurement[id_sample == id_sample[[i]] & attribute_name == "mass_absolute"]
},
NA_integer_
)
}),
id_measurement_denominator =
purrr::map_int(seq_len(nrow(.)), function(i) {
switch(
attribute_name[[i]],
"C_to_N" = {
id_measurement[id_sample == id_sample[[i]] & attribute_name == "N_relative_mass"]
},
"P_relative_mass" = ,
"N_relative_mass" = ,
"C_relative_mass" = {
id_measurement[id_sample == id_sample[[i]] & attribute_name == "mass_absolute"]
},
"P_relative_mass2" = ,
"N_relative_mass2" = {
id_measurement[id_sample == id_sample_incubation_start[[i]] & attribute_name == stringr::str_replace(attribute_name[[i]], "_relative_mass2?", "_absolute")]
},
"mass_relative_mass" = {
id_measurement[id_sample == id_sample_incubation_start[[i]] & attribute_name == "mass_absolute"]
},
NA_integer_
)
}),
value_type =
dplyr::case_when(
attribute_name == "mesh_size_absolute" ~ "point",
TRUE ~ "mean"
),
error_type =
dplyr::case_when(
attribute_name %in% c("mesh_size_absolute", "mass_absolute", "C_absolute", "N_absolute", "P_absolute", "temperature", "water_table_depth") | is.na(value) ~ NA_character_,
TRUE ~ "se"
),
comments_measurements =
dplyr::case_when(
attribute_name == "water_table_depth" ~ "`value`: Guessed from description that the water table depth is always near the surface.",
attribute_name == "temperature" ~ "`value`: Average value of measurements throughout the decomposition experiment.",
TRUE ~ NA_character_
)
)
d2_sample_size <-
samples2 %>%
tidyr::pivot_longer(
cols = dplyr::ends_with("_sample_size"),
names_to = "attribute_name",
values_to = "sample_size"
) %>%
dplyr::mutate(
attribute_name =
attribute_name %>%
stringr::str_remove(pattern = "_sample_size$"),
attribute_name =
dplyr::case_when(
attribute_name == "mass_remaining" ~ "mass_relative_mass",
attribute_name == "N" ~ "N_relative_mass",
attribute_name == "P" ~ "P_relative_mass",
attribute_name == "CN" ~ "C_to_N",
TRUE ~ attribute_name
)
) %>%
dplyr::select(id_sample, attribute_name, sample_size)
d2_error <-
samples2 %>%
tidyr::pivot_longer(
cols = dplyr::ends_with(c("_error")),
names_to = "attribute_name",
values_to = "error"
) %>%
dplyr::mutate(
attribute_name =
attribute_name %>%
stringr::str_remove(pattern = "_error$"),
attribute_name =
dplyr::case_when(
attribute_name == "mass_remaining" ~ "mass_relative_mass",
attribute_name == "N" ~ "N_relative_mass",
attribute_name == "P" ~ "P_relative_mass",
attribute_name == "CN" ~ "C_to_N",
TRUE ~ attribute_name
)
) %>%
dplyr::select(id_sample, attribute_name, error)
d2 <-
d2 %>%
dplyr::mutate(
error =
dplyr::left_join(d2, d2_error, by = c("id_sample", "attribute_name")) %>%
dplyr::pull(error),
sample_size =
dplyr::left_join(d2, d2_sample_size, by = c("id_sample", "attribute_name")) %>%
dplyr::pull(sample_size)
)
# rename attributes
d2 <-
d2 %>%
dplyr::mutate(
attribute_name =
dplyr::case_when(
attribute_name %in% c("N_relative_mass2", "P_relative_mass2") ~ stringr::str_remove(attribute_name, pattern = "2$"),
TRUE ~ attribute_name
)
)
# combine
d <-
dplyr::bind_rows(
db_template_tables$data,
d2
) %>%
dplyr::select(dplyr::all_of(colnames(db_template_tables$data)))
experimental_design_format <-
tibble::tibble(
id_dataset = datasets$id_dataset,
file = paste0(id_last$id_dataset + 1L, "/experimental_design_format.csv"),
experimental_design_description = "`site_name`: Name of the site."
)
# csv file to export
experimental_design_format2 <-
samples %>%
dplyr::filter(! is.na(experimental_design)) %>%
dplyr::filter(! duplicated(experimental_design)) %>%
dplyr::select(experimental_design, site_name)
# export
write.csv(experimental_design_format2, paste0(dir_target, "/experimental_design_format.csv"), row.names = FALSE)
# list all tables
dm_insert_in <-
list(
datasets =
datasets %>%
dplyr::select(dplyr::all_of(colnames(dm_dpeatdecomposition$datasets))),
samples =
samples %>%
dplyr::select(dplyr::all_of(colnames(dm_dpeatdecomposition$samples))),
data =
d %>%
dplyr::select(dplyr::all_of(colnames(dm_dpeatdecomposition$data))),
samples_to_samples =
samples_to_samples %>%
dplyr::select(dplyr::all_of(colnames(dm_dpeatdecomposition$samples_to_samples))),
citations_to_datasets =
citations_to_datasets %>%
dplyr::select(dplyr::all_of(colnames(dm_dpeatdecomposition$citations_to_datasets))),
experimental_design_format =
experimental_design_format %>%
dplyr::select(dplyr::all_of(colnames(dm_dpeatdecomposition$experimental_design_format)))
)
# check whether all column names as present in table attributes
all_column_names <-
purrr::map(dm_insert_in, colnames) %>%
unlist() %>%
unique()
if(! all(all_column_names %in% (dm_dpeatdecomposition %>% dm::pull_tbl(attributes) %>% dplyr::pull(attribute_name)))) {
cond <- purrr::map_lgl(all_column_names, function(.x) ! .x %in% (dm_dpeatdecomposition %>% dm::pull_tbl(attributes) %>% dplyr::pull(attribute_name)))
RMariaDB::dbDisconnect(con)
stop(paste0("New `attribute_name`s discovered: ", paste(all_column_names[cond], collapse = ", ")))
}
all_data_attributes <- unique(dm_insert_in$data$attribute_name)
if(! all(all_data_attributes %in% (dm_dpeatdecomposition %>% dm::pull_tbl(attributes) %>% dplyr::pull(attribute_name)))) {
RMariaDB::dbDisconnect(con)
cond <- purrr::map_lgl(all_data_attributes, function(.x) ! .x %in% (dm_dpeatdecomposition %>% dm::pull_tbl(attributes) %>% dplyr::pull(attribute_name)))
stop(paste0("New `attribute_name`s discovered: ", paste(all_data_attributes[cond], collapse = ", ")))
}
# filter empty tables
dm_insert_in_check <-
dm_insert_in[purrr::map_lgl(dm_insert_in, function(x) nrow(x) > 0)] %>%
dm::as_dm() %>%
dp_dm_add_keys(dm_dpeatdecomposition)
# copy into dm_pmird
for(i in seq_along(dm_insert_in)) {
RMariaDB::dbAppendTable(con, name = names(dm_insert_in)[[i]], value = dm_insert_in[[i]])
}
RMariaDB::dbDisconnect(con)