# packages
library(metaDigitise)
library(magrittr)
library(tibble)
library(dplyr)
library(lubridate)
library(dpeatdecomposition)
library(dm)
library(RMariaDB)
Connect to database
# connect to database
con <-
RMariaDB::dbConnect(
drv = RMariaDB::MariaDB(),
dbname = "dpeatdecomposition",
default.file = "~/my.cnf"
)
# get database as dm object
dm_dpeatdecomposition <-
dpeatdecomposition::dp_get_dm(con, learn_keys = TRUE)
Get most current IDs
id_last <-
list(
id_dataset =
dm_dpeatdecomposition %>%
dm::pull_tbl(datasets) %>%
dplyr::pull(id_dataset) %>%
tail(1),
id_sample =
dm_dpeatdecomposition %>%
dm::pull_tbl(samples) %>%
dplyr::pull(id_sample) %>%
tail(1),
id_measurement =
dm_dpeatdecomposition %>%
dm::pull_tbl(data) %>%
dplyr::pull(id_measurement) %>%
tail(1)
) %>%
purrr::map(function(.x) {
if(length(.x) == 0) {
0L
} else {
.x
}
})
Create directories
dir_name <- "d43"
dir_source <- "../raw_data/data/d43"
dir_target <- paste0("../derived_data/", id_last$id_dataset + 1L)
if(!dir.exists(dir_target)) {
dir.create(dir_target)
}
datasets <-
tibble::tibble(
id_dataset = id_last$id_dataset + 1L
)
citations_to_datasets <-
dplyr::bind_rows(
db_template_tables$citations_to_datasets,
tibble::tibble(
id_dataset = datasets$id_dataset,
id_citation = c("Strakova.2010", "Strakova.2012")
)
)
# mass remaining cellulose
samples3_1 <-
dplyr::bind_rows(
readRDS(paste0(dir_source, "/raw/caldat/Strakova.2012-FigA4a"))$processed_data,
readRDS(paste0(dir_source, "/raw/caldat/Strakova.2012-FigA4b"))$processed_data,
readRDS(paste0(dir_source, "/raw/caldat/Strakova.2012-FigA4c"))$processed_data,
readRDS(paste0(dir_source, "/raw/caldat/Strakova.2012-FigA4d"))$processed_data %>%
dplyr::mutate(
id =
id %>% stringr::str_replace("treatmentltd", "treatmentcontrol")
),
readRDS(paste0(dir_source, "/raw/caldat/Strakova.2012-FigA4e"))$processed_data,
readRDS(paste0(dir_source, "/raw/caldat/Strakova.2012-FigA4f"))$processed_data %>%
dplyr::mutate(
id =
id %>% stringr::str_replace("treatmentstd", "treatmentltd")
)
) %>%
dplyr::select(id, mean, error) %>%
dplyr::rename(
mass_relative_mass = "mean",
mass_relative_mass_error = "error"
) %>%
dplyr::mutate(
id_dataset = datasets$id_dataset[[1]],
is_incubated = TRUE,
incubation_environment = "peat",
mass_absolute = NA_real_,
sample_type = "cellulose",
sample_type2 = "cellulose strips",
incubation_plot_type =
dplyr::case_when(
stringr::str_detect(id, "incubationplottypeof") ~ "oligotrophic_fen", #---note: It is unclear whether the data refer to samples incubated only in the oligotrophic fen, only in the mesotrophic fen, or in both. I assume here that they were incubated in the oligotrophic fen to keep the data structure simpler.
stringr::str_detect(id, "incubationplottypeob") ~ "bog"
),
sample_treatment =
dplyr::case_when(
stringr::str_detect(id, "treatmentcontrol") ~ "control",
stringr::str_detect(id, "treatmentstd") ~ "short_term_drainage",
stringr::str_detect(id, "treatmentltd") ~ "long_term_drainage"
),
sample_depth_upper =
id %>%
stringr::str_extract(pattern = "\\d+\\.") %>%
stringr::str_remove(pattern = "\\.$") %>%
as.numeric(),
sample_depth_lower =
id %>%
stringr::str_extract(pattern = "\\.\\d+") %>%
stringr::str_remove(pattern = "^\\.") %>%
as.numeric(),
sample_depth_upper =
dplyr::case_when(
stringr::str_detect(id, "wheremoss") ~ 2, #---note: I assume this as depth under the surface of the moss carpet
TRUE ~ sample_depth_upper
),
sample_depth_lower =
dplyr::case_when(
stringr::str_detect(id, "wheremoss") ~ 2, #---note: I assume this as depth under the surface of the moss carpet
TRUE ~ sample_depth_upper
),
sample_microhabitat =
dplyr::case_when(
stringr::str_detect(id, "wherehollow") ~ "hollow",
TRUE ~ NA_character_
),
incubation_duration =
id %>%
stringr::str_extract("\\d{1}$") %>%
as.numeric() %>%
lubridate::dyears() %>%
lubridate::time_length(unit = "days"),
mesh_size_absolute = 1, #---note: assumed based on mesh size for other litter bags
site_name = "Lakkasuo",
sampling_longitude =
"24°19'E" %>%
sp::char2dms(chd = "°", chm = "'", chs = "''") %>%
as.numeric(),
sampling_latitude =
"61°48'N" %>%
sp::char2dms(chd = "°", chm = "'", chs = "''") %>%
as.numeric(),
mass_relative_mass_error_type = "se", #---note: assumed
mass_relative_mass_sample_size = NA,
dplyr::across(dplyr::any_of(c("mass_relative_mass", "mass_relative_mass_error")), function(.x) .x/100),
sampling_date =
as.Date(as.Date("2004-10-01") + lubridate::ddays(incubation_duration)), #---note: assumed/guessed (better than nothing)
sampling_year = lubridate::year(sampling_date),
sampling_month = lubridate::month(sampling_date),
sampling_day = NA,
comments_samples = "Coordinates are only the approximate location of the study site, but not sampling points. incubation_plot_type == oligotrophic_fen, mesh_size, sampling_year, sampling_month ar assumed.",
experimental_design =
paste0(
as.numeric(as.factor(site_name)), "_",
as.numeric(factor(incubation_plot_type, levels = c("bog", "mesotrophic_fen", "oligotrophic_fen"))), "_",
as.numeric(as.factor(sample_treatment))
)
) %>%
dplyr::select(-id) %>%
dplyr::filter(!duplicated(paste0(experimental_design, "_", sample_microhabitat, "_", sample_depth_upper, "_", incubation_duration)))
# initial mass cellulose
samples2_1 <-
samples3_1 %>%
dplyr::filter(incubation_duration == 365.25) %>%
dplyr::mutate(
incubation_duration = 0.0,
mass_relative_mass = 1.0,
mass_relative_mass_error = 0.0
)
# mass remaining litter
samples2_2 <-
readODS::read_ods(paste0(dir_source, "/derived/Strakova.2012-TabA1.ods")) %>%
dplyr::mutate(
id_dataset = datasets$id_dataset[[1]],
is_incubated = TRUE,
incubation_environment = "peat",
mass_absolute = NA_real_,
sampling_longitude =
sampling_longitude %>%
sp::char2dms(chd = "°", chm = "'", chs = "''") %>%
as.numeric(),
sampling_latitude =
sampling_latitude %>%
sp::char2dms(chd = "°", chm = "'", chs = "''") %>%
as.numeric(),
mass_relative_mass_error =
mass_loss %>%
stringr::str_extract(pattern = "\\(\\d+\\.?\\d*\\)") %>%
stringr::str_remove_all(pattern = "(\\(|\\))") %>%
as.numeric(),
mass_relative_mass =
mass_loss %>%
stringr::str_extract(pattern = "^\\d+\\.?\\d*") %>%
as.numeric(),
mass_relative_mass = (100 - mass_relative_mass)/100,
mass_relative_mass_error = mass_relative_mass_error/100,
index =
litter_type_label %>%
stringr::str_extract(pattern = "(a|b|c){1,1}$"),
sampling_year =
dplyr::case_when(
index == "a" ~ 2005,
index == "b" ~ 2005,
index == "c" ~ 2006,
TRUE ~ 2004
) %>%
magrittr::add(incubation_duration),
sampling_month = 10,
sampling_day = NA_real_,
experimental_design =
paste0(
as.numeric(as.factor(site_name)), "_",
as.numeric(factor(incubation_plot_type, levels = c("bog", "mesotrophic_fen", "oligotrophic_fen"))), "_",
as.numeric(as.factor(treatment))
),
incubation_duration =
incubation_duration %>%
lubridate::dyears() %>%
lubridate::time_length(unit = "days"),
comments_samples = "Coordinates are only the approximate location of the study site, but not sampling points."
) %>%
dplyr::rename(
sample_treatment = "treatment",
mesh_size_absolute = "mesh_size"
) %>%
dplyr::select(-index) %>%
dplyr::mutate(
type =
dplyr::case_when(
incubation_duration == 0.0 ~ "samples2",
TRUE ~ "samples3"
)
)
# litter collection
samples1_1 <-
samples2_1 %>%
dplyr::slice(1) %>%
dplyr::mutate(
id_sample = seq_len(nrow(.)) + id_last$id_sample,
id_sample_origin = id_sample,
id_sample_parent = id_sample,
id_sample_incubation_start = NA_integer_,
site_label = NA_character_,
site_name = NA_character_,
sampling_longitude = NA_real_,
sampling_latitude = NA_real_,
sample_treatment = "control",
experimental_design = NA_character_,
is_incubated = FALSE,
incubation_environment = NA_character_,
sample_depth_upper = NA_real_,
sample_depth_lower = NA_real_,
sampling_date = NA_real_,
sampling_year = NA_real_,
sampling_month = NA_real_,
sampling_day = NA_real_
)
samples1_2 <-
dplyr::bind_rows(
samples2_2 %>%
dplyr::filter(incubation_duration == 0 & taxon_rank_value %in% c("Betula nana", "Erophporum vaginatum", "Pinus sylvestris") & sample_depth_upper %in% c(NA_real_, 0)),
samples2_2 %>%
dplyr::filter(! duplicated(paste0(taxon_rank_value, "_", taxon_organ, "_", sample_diameter_lower, "_", sample_diameter_upper)) & ! taxon_rank_value %in% c("Betula nana", "Erophporum vaginatum", "Pinus sylvestris")) %>%
dplyr::mutate(
sample_treatment = NA_character_,
incubation_plot_type = NA_character_,
experimental_design = NA_character_
)
) %>%
dplyr::mutate(
id_sample = seq_len(nrow(.)) + max(samples1_1$id_sample),
id_sample_origin = id_sample,
id_sample_parent = id_sample,
id_sample_incubation_start = NA_integer_,
is_incubated = FALSE,
incubation_environment = NA_character_,
sample_depth_upper =
dplyr::case_when(
stringr::str_detect(taxon_rank_value, "Sphagnum") ~ 3,
TRUE ~ NA_real_
),
sample_depth_lower =
dplyr::case_when(
stringr::str_detect(taxon_rank_value, "Sphagnum") ~ 5,
TRUE ~ NA_real_
),
sampling_date = NA_real_,
sampling_year = NA_real_,
sampling_month = NA_real_,
sampling_day = NA_real_
)
# add missing ids
samples2_1 <-
dplyr::bind_rows(
samples2_1 %>%
dplyr::mutate(
type = "samples2"
),
samples3_1 %>%
dplyr::mutate(
type = "samples3"
)
)
samples2_1 <-
samples2_1 %>%
dplyr::mutate(
id_sample = seq_len(nrow(.)) + max(samples1_2$id_sample),
id_sample_origin = samples1_1$id_sample,
id_sample_incubation_start =
purrr::map_int(seq_len(nrow(.)), function(i) {
index <- paste0(site_name, "_", sample_depth_upper, "_", sample_microhabitat) == paste0(site_name, "_", sample_depth_upper, "_", sample_microhabitat)[[i]] & experimental_design == experimental_design[[i]] & incubation_duration == 0.0
id_sample[index]
}),
id_sample_parent =
purrr::map_int(seq_len(nrow(.)), function(i) {
index <- paste0(site_name, "_", sample_depth_upper, "_", sample_microhabitat) == paste0(site_name, "_", sample_depth_upper, "_", sample_microhabitat)[[i]] & experimental_design == experimental_design[[i]] & incubation_duration < incubation_duration[[i]]
if(! any(index)) {
id_sample_origin[[i]]
} else {
target_incubation_duration <- max(incubation_duration[index])
index <- index & incubation_duration == target_incubation_duration
id_sample[index]
}
})
)
samples2_2 <-
samples2_2 %>%
dplyr::mutate(
id_sample = seq_len(nrow(.)) + max(samples2_1$id_sample),
id_sample_origin =
purrr::map_int(seq_len(nrow(.)), function(i) {
if(! taxon_rank_value[[i]] %in% c("Betula nana", "Erophporum vaginatum", "Pinus sylvestris")) {
dplyr::left_join(
samples2_2 %>%
dplyr::slice(i) %>%
dplyr::select(site_name, taxon_rank_value, taxon_organ, sample_diameter_lower, sample_diameter_upper),
samples1_2 %>%
dplyr::select(site_name, taxon_rank_value, taxon_organ, sample_diameter_lower, sample_diameter_upper, id_sample),
by = c("site_name", "taxon_rank_value", "taxon_organ", "sample_diameter_lower", "sample_diameter_upper")
) %>%
dplyr::pull(id_sample)
} else {
dplyr::left_join(
samples2_2 %>%
dplyr::slice(i) %>%
dplyr::select(site_name, taxon_rank_value, taxon_organ, sample_diameter_lower, sample_diameter_upper, incubation_plot_type, sample_treatment),
samples1_2 %>%
dplyr::select(site_name, taxon_rank_value, taxon_organ, sample_diameter_lower, sample_diameter_upper, incubation_plot_type, sample_treatment, id_sample),
by = c("site_name", "taxon_rank_value", "taxon_organ", "sample_diameter_lower", "sample_diameter_upper", "incubation_plot_type", "sample_treatment")
) %>%
dplyr::pull(id_sample)
}
}),
id_sample_incubation_start =
purrr::map_int(seq_len(nrow(.)), function(i) {
index <- paste0(taxon_rank_value, "_", site_name, "_", taxon_organ, "_", sample_diameter_lower, "_", sample_diameter_upper, "_", sample_depth_upper, "_", sample_depth_lower) == paste0(taxon_rank_value, "_", site_name, "_", taxon_organ, "_", sample_diameter_lower, "_", sample_diameter_upper, "_", sample_depth_upper, "_", sample_depth_lower)[[i]] & experimental_design == experimental_design[[i]] & incubation_duration == 0.0
id_sample[index]
}),
id_sample_parent =
purrr::map_int(seq_len(nrow(.)), function(i) {
index <- paste0(taxon_rank_value, "_", site_name, "_", taxon_organ, "_", sample_diameter_lower, "_", sample_diameter_upper, "_", sample_depth_upper, "_", sample_depth_lower) == paste0(taxon_rank_value, "_", site_name, "_", taxon_organ, "_", sample_diameter_lower, "_", sample_diameter_upper, "_", sample_depth_upper, "_", sample_depth_lower)[[i]] & experimental_design == experimental_design[[i]] & incubation_duration < incubation_duration[[i]]
if(! any(index)) {
id_sample_origin[[i]]
} else {
target_incubation_duration <- max(incubation_duration[index])
index <- index & incubation_duration == target_incubation_duration
id_sample[index]
}
})
)
# litter chemistry from Strakova.2010
d43_litter_chemistry <-
readODS::read_ods(paste0(dir_source, "/derived/Strakova.2010-TabA1.ods")) %>%
dplyr::mutate(
dplyr::across(11:48, function(.x) {
.x %>%
stringr::str_extract(pattern = "\\(\\d+\\.?\\d*\\)") %>%
stringr::str_remove_all(pattern = "(\\(|\\))") %>%
as.numeric()
}, .names = "{.col}_error"),
dplyr::across(11:48, function(.x) {
.x %>%
stringr::str_extract(pattern = "^\\d+\\.?\\d*") %>%
as.numeric()
})
) %>%
dplyr::mutate(
dplyr::across(c(11:48, 50:ncol(.))[! colnames(.)[c(11:48, 50:ncol(.))] %in% c("C_to_N", "C_to_P", "Klason_ligninN", "C_to_N_error", "C_to_P_error", "Klason_ligninN_error")], function(.x) {
.x/1000
}),
dplyr::across(11:48, function(.x) {
"se"
}, .names = "{.col}_error_type"),
dplyr::across(11:48, function(.x) {
sample_size
}, .names = "{.col}_sample_size"),
dplyr::across(11:48, function(.x) {
NA_real_
}, .names = "{.col}_mass_absolute")
) %>%
dplyr::select(-sample_size) %>%
dplyr::filter(! taxon_rank_value %in% c("Betula nana", "Erophporum vaginatum", "Pinus sylvestris")) %>% #---note: since we do not know from which of the incubation_plot_type these were collected
dplyr::select(! dplyr::any_of(c("taxon_rank_name", "site_name", "sampling_longitude", "sampling_latitude", "comments_samples"))) %>%
dplyr::select(! dplyr::starts_with("Klason_ligninN")) %>%
dplyr::mutate(
incubation_duration = 0.0
) %>%
dplyr::rename_with(
.cols = dplyr::starts_with(PeriodicTable:::periodicTable$symb, ignore.case = FALSE) & dplyr::ends_with("_relative_mass_mass_absolute", ignore.case = FALSE) & ! dplyr::any_of(c("C_to_N", "C_to_P")),
.fn = function(.x) {
stringr::str_replace(.x, pattern = "_relative_mass_mass_absolute$", replacement = "_absolute")
}
) %>%
dplyr::rename_with(
.cols = ! dplyr::starts_with(PeriodicTable:::periodicTable$symb, ignore.case = FALSE) & dplyr::ends_with("_relative_mass_mass_absolute", ignore.case = FALSE),
.fn = function(.x) {
stringr::str_replace(.x, pattern = "_relative_mass_mass_absolute$", replacement = "_absolute")
}
)
samples2_2 <-
dplyr::left_join(
samples2_2,
d43_litter_chemistry,
by = c("incubation_duration", "taxon_rank_value", "taxon_organ", "sample_diameter_lower", "sample_diameter_upper")
)
# wtd
samples4 <-
dplyr::bind_rows(
readRDS(paste0(dir_source, "/raw/caldat/Strakova.2012-Fig1a"))$processed_data,
readRDS(paste0(dir_source, "/raw/caldat/Strakova.2012-Fig1b"))$processed_data,
readRDS(paste0(dir_source, "/raw/caldat/Strakova.2012-Fig1c"))$processed_data
) %>%
dplyr::select(id, x, y) %>%
dplyr::rename(
water_table_depth = "y"
) %>%
dplyr::mutate(
sample_type = "peat",
sample_depth_upper = 0,
sample_depth_lower = 0,
incubation_plot_type =
dplyr::case_when(
stringr::str_detect(id, "incubationplottypeof") ~ "oligotrophic_fen",
stringr::str_detect(id, "incubationplottypemf") ~ "mesootrophic_fen",
stringr::str_detect(id, "incubationplottypeob") ~ "bog"
),
sample_treatment =
dplyr::case_when(
stringr::str_detect(id, "treatmentcontrol") ~ "control",
stringr::str_detect(id, "treatmentstd") ~ "short_term_drainage",
stringr::str_detect(id, "treatmentltd") ~ "long_term_drainage"
),
site_name = "Lakkasuo",
sampling_longitude =
"24°19'E" %>%
sp::char2dms(chd = "°", chm = "'", chs = "''") %>%
as.numeric(),
sampling_latitude =
"61°48'N" %>%
sp::char2dms(chd = "°", chm = "'", chs = "''") %>%
as.numeric(),
water_table_depth = water_table_depth * (-1),
sampling_date =
as.Date(as.Date("2005-04-01") + lubridate::dmonths(x)),
sampling_year = lubridate::year(sampling_date),
sampling_month = lubridate::month(sampling_date),
sampling_day = NA,
comments_samples = "Coordinates are only the approximate location of the study site, but not sampling points. `sampling_month` is approximate.",
id_dataset = datasets$id_dataset,
is_incubated = FALSE,
incubation_duration = 0.0,
id_sample = seq_len(nrow(.)) + max(samples2_2$id_sample),
id_sample_parent = id_sample,
id_sample_origin = id_sample,
experimental_design =
paste0(
as.numeric(as.factor(site_name)), "_",
as.numeric(factor(incubation_plot_type, levels = c("bog", "mesotrophic_fen", "oligotrophic_fen"))), "_",
as.numeric(as.factor(sample_treatment))
)
) %>%
dplyr::select(-id, -sampling_date, -x)
# peat chemistry
samples5 <-
readODS::read_ods(paste0(dir_source, "/derived/Strakova.2010-Tab1.ods")) %>%
dplyr::mutate(
dplyr::across(13:22, function(.x) {
.x %>%
stringr::str_extract(pattern = "\\(\\d+\\.?\\d*\\)") %>%
stringr::str_remove_all(pattern = "(\\(|\\))") %>%
as.numeric()
}, .names = "{.col}_error"),
dplyr::across(13:22, function(.x) {
.x %>%
stringr::str_extract(pattern = "^\\d+\\.?\\d*") %>%
as.numeric()
})
) %>%
dplyr::mutate(
dplyr::across(c(13:22, 24:ncol(.))[! colnames(.)[c(13:22, 24:ncol(.))] %in% c("pH", "pH_error")], function(.x) {
.x/1000
}),
dplyr::across(13:22, function(.x) {
"se"
}, .names = "{.col}_error_type"),
dplyr::across(13:22, function(.x) {
3L
}, .names = "{.col}_sample_size"),
dplyr::across(13:21, function(.x) {
NA_real_
}, .names = "{.col}_absolute"),
sampling_longitude =
sampling_longitude %>%
sp::char2dms(chd = "°", chm = "'", chs = "''") %>%
as.numeric(),
sampling_latitude =
sampling_latitude %>%
sp::char2dms(chd = "°", chm = "'", chs = "''") %>%
as.numeric(),
id_dataset = datasets$id_dataset,
is_incubated = FALSE,
incubation_duration = 0.0,
id_sample = seq_len(nrow(.)) + max(samples4$id_sample),
id_sample_parent = id_sample,
id_sample_origin = id_sample,
sample_treatment = treatment,
sample_microhabitat = microhabitat,
experimental_design =
paste0(
as.numeric(as.factor(site_name)), "_",
as.numeric(factor(incubation_plot_type, levels = c("bog", "mesotrophic_fen", "oligotrophic_fen"))), "_",
as.numeric(as.factor(sample_treatment))
)
) %>%
dplyr::rename_with(
.cols = dplyr::ends_with("_relative_mass_absolute", ignore.case = FALSE),
.fn = function(.x) {
stringr::str_replace(.x, pattern = "_relative_mass_absolute$", replacement = "_absolute")
}
)
## combine
samples1 <-
dplyr::bind_rows(
samples1_1,
samples1_2
)
samples2 <-
dplyr::bind_rows(
samples2_1,
samples2_2
)
samples <-
dplyr::bind_rows(
db_template_tables$samples,
samples1 %>%
dplyr::mutate(
type = "samples1"
) %>%
dplyr::select(-sampling_date),
samples2,
samples4 %>%
dplyr::mutate(
type = "samples4"
),
samples5 %>%
dplyr::mutate(
type = "samples4"
)
)
samples_to_samples <-
samples %>%
dplyr::filter(! id_sample %in% id_sample_origin) %>%
dplyr::mutate(
transition_description =
dplyr::case_when(
type %in% c("samples2") ~ "translocate",
type %in% c("samples3") ~ "wait",
TRUE ~ NA_character_
)
) %>%
dplyr::select(id_sample_parent, id_sample, transition_description) %>%
dplyr::rename(
id_sample_child = "id_sample"
)
d2 <-
samples2 %>%
tidyr::pivot_longer(
cols = dplyr::all_of(c("mass_absolute", "mass_relative_mass", "mesh_size_absolute", "ash_mass_relative_mass", "N_relative_mass", "P_relative_mass", "Ca_relative_mass", "K_relative_mass", "Mg_relative_mass", "Mn_relative_mass", "C_relative_mass", "C_to_N", "C_to_P", "dichloromethane_extractives_mass_relative_mass", "acetone_extractives_mass_relative_mass", "ethanol_extractives_mass_relative_mass", "water_extractives_mass_relative_mass", "cellulose_mass_relative_mass", "arabinose_mass_relative_mass", "rhamnose_mass_relative_mass", "xylose_mass_relative_mass", "mannose_mass_relative_mass", "galactose_mass_relative_mass", "glucose_mass_relative_mass", "glucuronic_acid_mass_relative_mass", "galacturonic_acid_mass_relative_mass", "holocellulose_mass_relative_mass", "4_hydroxybenzaldehyde_mass_relative_mass", "4_hydroxyacetophenone_mass_relative_mass", "4_hydroxybenzoic_acid_mass_relative_mass", "vanillin_mass_relative_mass", "vanillic_acid_mass_relative_mass", "acetovanillone_mass_relative_mass", "syringe_aldehyde_mass_relative_mass", "acetosyringone_mass_relative_mass", "syringic_acid_mass_relative_mass", "p_coumaric_acid_mass_relative_mass", "ferulic_acid_mass_relative_mass", "Klason_lignin_mass_relative_mass", "soluble_lignin_mass_relative_mass",
"ash_mass_absolute", "N_absolute", "P_absolute", "Ca_absolute", "K_absolute", "Mg_absolute", "Mn_absolute", "C_absolute", "dichloromethane_extractives_mass_absolute", "acetone_extractives_mass_absolute", "ethanol_extractives_mass_absolute", "water_extractives_mass_absolute", "cellulose_mass_absolute", "arabinose_mass_absolute", "rhamnose_mass_absolute", "xylose_mass_absolute", "mannose_mass_absolute", "galactose_mass_absolute", "glucose_mass_absolute", "glucuronic_acid_mass_absolute", "galacturonic_acid_mass_absolute", "holocellulose_mass_absolute", "4_hydroxybenzaldehyde_mass_absolute", "4_hydroxyacetophenone_mass_absolute", "4_hydroxybenzoic_acid_mass_absolute", "vanillin_mass_absolute", "vanillic_acid_mass_absolute", "acetovanillone_mass_absolute", "syringe_aldehyde_mass_absolute", "acetosyringone_mass_absolute", "syringic_acid_mass_absolute", "p_coumaric_acid_mass_absolute", "ferulic_acid_mass_absolute", "Klason_lignin_mass_absolute", "soluble_lignin_mass_absolute")),
names_to = "attribute_name",
values_to = "value"
) %>%
dplyr::mutate(
id_measurement = seq_len(nrow(.)) + id_last$id_measurement,
id_measurement_numerator =
purrr::map_int(seq_len(nrow(.)), function(i) {
if(attribute_name[[i]] == "mass_relative_mass") {
id_measurement[id_sample == id_sample[[i]] & attribute_name == "mass_absolute"]
} else if (stringr::str_detect(attribute_name[[i]], pattern = "_relative_mass$")) {
id_measurement[id_sample == id_sample[[i]] & attribute_name == stringr::str_replace(attribute_name[[i]], "_relative_mass2?", "_absolute")]
} else {
NA_integer_
}
}),
id_measurement_denominator =
purrr::map_int(seq_len(nrow(.)), function(i) {
if(attribute_name[[i]] == "mass_relative_mass") {
id_measurement[id_sample == id_sample_incubation_start[[i]] & attribute_name == "mass_absolute"]
} else if (stringr::str_detect(attribute_name[[i]], pattern = "_relative_mass$")) {
id_measurement[id_sample == id_sample[[i]] & attribute_name == "mass_absolute"]
} else {
NA_integer_
}
}),
value_type =
dplyr::case_when(
attribute_name == "mesh_size_absolute" ~ "point",
TRUE ~ "mean"
)
)
d2_sample_size <-
samples2 %>%
tidyr::pivot_longer(
cols = dplyr::ends_with("_sample_size"),
names_to = "attribute_name",
values_to = "sample_size"
) %>%
dplyr::mutate(
attribute_name =
attribute_name %>%
stringr::str_remove(pattern = "_sample_size$")
) %>%
dplyr::select(id_sample, attribute_name, sample_size)
d2_error <-
samples2 %>%
tidyr::pivot_longer(
cols = dplyr::ends_with(c("_error")),
names_to = "attribute_name",
values_to = "error"
) %>%
dplyr::mutate(
attribute_name =
attribute_name %>%
stringr::str_remove(pattern = "_error$")
) %>%
dplyr::select(id_sample, attribute_name, error)
d2_error_type <-
samples2 %>%
tidyr::pivot_longer(
cols = dplyr::ends_with(c("_error_type")),
names_to = "attribute_name",
values_to = "error_type"
) %>%
dplyr::mutate(
attribute_name =
attribute_name %>%
stringr::str_remove(pattern = "_error_type$")
) %>%
dplyr::select(id_sample, attribute_name, error_type)
d2 <-
d2 %>%
dplyr::mutate(
error =
dplyr::left_join(d2, d2_error, by = c("id_sample", "attribute_name")) %>%
dplyr::pull(error),
error_type =
dplyr::left_join(d2, d2_error_type, by = c("id_sample", "attribute_name")) %>%
dplyr::pull(error_type),
sample_size =
dplyr::left_join(d2, d2_sample_size, by = c("id_sample", "attribute_name")) %>%
dplyr::pull(sample_size)
)
# water table depth
d4 <-
samples4 %>%
tidyr::pivot_longer(
cols = dplyr::all_of(c("water_table_depth")),
names_to = "attribute_name",
values_to = "value"
) %>%
dplyr::mutate(
id_measurement = seq_len(nrow(.)) + max(d2$id_measurement),
value_type = "mean"
)
# peat chemistry
d5 <-
samples5 %>%
dplyr::mutate(
mass_absolute = NA_real_
) %>%
tidyr::pivot_longer(
cols = dplyr::all_of(c("mass_absolute", "C_relative_mass", "N_relative_mass", "P_relative_mass", "K_relative_mass", "Ca_relative_mass", "Mg_relative_mass", "Mn_relative_mass", "Fe_relative_mass", "ash_mass_relative_mass", "pH", "C_absolute", "N_absolute", "P_absolute", "K_absolute", "Ca_absolute", "Mg_absolute", "Mn_absolute", "Fe_absolute", "ash_mass_absolute")),
names_to = "attribute_name",
values_to = "value"
) %>%
dplyr::mutate(
id_measurement = seq_len(nrow(.)) + max(d4$id_measurement),
id_measurement_numerator =
purrr::map_int(seq_len(nrow(.)), function(i) {
if(attribute_name[[i]] == "mass_relative_mass") {
id_measurement[id_sample == id_sample[[i]] & attribute_name == "mass_absolute"]
} else if (stringr::str_detect(attribute_name[[i]], pattern = "_relative_mass$")) {
id_measurement[id_sample == id_sample[[i]] & attribute_name == stringr::str_replace(attribute_name[[i]], "_relative_mass2?", "_absolute")]
} else {
NA_integer_
}
}),
id_measurement_denominator =
purrr::map_int(seq_len(nrow(.)), function(i) {
if(attribute_name[[i]] == "mass_relative_mass") {
id_measurement[id_sample == id_sample_incubation_start[[i]] & attribute_name == "mass_absolute"]
} else if (stringr::str_detect(attribute_name[[i]], pattern = "_relative_mass$")) {
id_measurement[id_sample == id_sample[[i]] & attribute_name == "mass_absolute"]
} else {
NA_integer_
}
}),
value_type =
dplyr::case_when(
attribute_name == "mesh_size_absolute" ~ "point",
TRUE ~ "mean"
)
)
d5_sample_size <-
samples5 %>%
tidyr::pivot_longer(
cols = dplyr::ends_with("_sample_size"),
names_to = "attribute_name",
values_to = "sample_size"
) %>%
dplyr::mutate(
attribute_name =
attribute_name %>%
stringr::str_remove(pattern = "_sample_size$")
) %>%
dplyr::select(id_sample, attribute_name, sample_size)
d5_error <-
samples5 %>%
tidyr::pivot_longer(
cols = dplyr::ends_with(c("_error")),
names_to = "attribute_name",
values_to = "error"
) %>%
dplyr::mutate(
attribute_name =
attribute_name %>%
stringr::str_remove(pattern = "_error$")
) %>%
dplyr::select(id_sample, attribute_name, error)
d5_error_type <-
samples5 %>%
tidyr::pivot_longer(
cols = dplyr::ends_with(c("_error_type")),
names_to = "attribute_name",
values_to = "error_type"
) %>%
dplyr::mutate(
attribute_name =
attribute_name %>%
stringr::str_remove(pattern = "_error_type$")
) %>%
dplyr::select(id_sample, attribute_name, error_type)
d5 <-
d5 %>%
dplyr::mutate(
error =
dplyr::left_join(d5, d5_error, by = c("id_sample", "attribute_name")) %>%
dplyr::pull(error),
error_type =
dplyr::left_join(d5, d5_error_type, by = c("id_sample", "attribute_name")) %>%
dplyr::pull(error_type),
sample_size =
dplyr::left_join(d5, d5_sample_size, by = c("id_sample", "attribute_name")) %>%
dplyr::pull(sample_size)
)
# combine
d <-
dplyr::bind_rows(
db_template_tables$data,
d2,
d4,
d5
) %>%
dplyr::select(dplyr::all_of(colnames(db_template_tables$data)))
experimental_design_format <-
tibble::tibble(
id_dataset = datasets$id_dataset,
experimental_design_format = "site_name//incubation_plot_type//sample_treatment",
file = paste0(id_last$id_dataset + 1L, "/experimental_design_format.csv"),
experimental_design_description = "`site_name`: Name of the site. `incubation_plot_type`: Description of the plot type where the sample was incubated (see the article for details). `sample_treatment`: Description of the treatment (see the article for details)."
)
# csv file to export
experimental_design_format2 <-
samples %>%
dplyr::filter(! is.na(experimental_design)) %>%
dplyr::filter(! duplicated(experimental_design)) %>%
dplyr::select(experimental_design, site_name, incubation_plot_type, sample_treatment)
# export
write.csv(experimental_design_format2, paste0(dir_target, "/experimental_design_format.csv"), row.names = FALSE)
# disconnect and reconnect to avoid database timeout
RMariaDB::dbDisconnect(con)
# connect to database
con <-
RMariaDB::dbConnect(
drv = RMariaDB::MariaDB(),
dbname = "dpeatdecomposition",
default.file = "~/my.cnf"
)
# get database as dm object
dm_dpeatdecomposition <-
dpeatdecomposition::dp_get_dm(con, learn_keys = TRUE)
# list all tables
dm_insert_in <-
list(
datasets =
datasets %>%
dplyr::select(dplyr::all_of(colnames(dm_dpeatdecomposition$datasets))),
samples =
samples %>%
dplyr::select(dplyr::all_of(colnames(dm_dpeatdecomposition$samples))),
data =
d %>%
dplyr::select(dplyr::all_of(colnames(dm_dpeatdecomposition$data))),
samples_to_samples =
samples_to_samples %>%
dplyr::select(dplyr::all_of(colnames(dm_dpeatdecomposition$samples_to_samples))),
citations_to_datasets =
citations_to_datasets %>%
dplyr::select(dplyr::all_of(colnames(dm_dpeatdecomposition$citations_to_datasets))),
experimental_design_format =
experimental_design_format %>%
dplyr::select(dplyr::all_of(colnames(dm_dpeatdecomposition$experimental_design_format)))
)
# check whether all column names as present in table attributes
all_column_names <-
purrr::map(dm_insert_in, colnames) %>%
unlist() %>%
unique()
if(! all(all_column_names %in% (dm_dpeatdecomposition %>% dm::pull_tbl(attributes) %>% dplyr::pull(attribute_name)))) {
cond <- purrr::map_lgl(all_column_names, function(.x) ! .x %in% (dm_dpeatdecomposition %>% dm::pull_tbl(attributes) %>% dplyr::pull(attribute_name)))
RMariaDB::dbDisconnect(con)
stop(paste0("New `attribute_name`s discovered: ", paste(all_column_names[cond], collapse = ", ")))
}
all_data_attributes <- unique(dm_insert_in$data$attribute_name)
if(! all(all_data_attributes %in% (dm_dpeatdecomposition %>% dm::pull_tbl(attributes) %>% dplyr::pull(attribute_name)))) {
cond <- purrr::map_lgl(all_data_attributes, function(.x) ! .x %in% (dm_dpeatdecomposition %>% dm::pull_tbl(attributes) %>% dplyr::pull(attribute_name)))
stop(paste0("New `attribute_name`s discovered: ", paste(all_data_attributes[cond], collapse = ", ")))
RMariaDB::dbDisconnect(con)
}
# filter empty tables
dm_insert_in_check <-
dm_insert_in[purrr::map_lgl(dm_insert_in, function(x) nrow(x) > 0)] %>%
dm::as_dm() %>%
dp_dm_add_keys(dm_dpeatdecomposition)
## checking pk constraints [==============================] 100% in 1s
# copy into dm_pmird
for(i in seq_along(dm_insert_in)) {
RMariaDB::dbAppendTable(con, name = names(dm_insert_in)[[i]], value = dm_insert_in[[i]])
}
RMariaDB::dbDisconnect(con)