# packages
library(metaDigitise)
library(magrittr)
library(tibble)
library(dplyr)
library(lubridate)
library(dpeatdecomposition)
library(dm)
library(RMariaDB)
Connect to database
# connect to database
con <-
RMariaDB::dbConnect(
drv = RMariaDB::MariaDB(),
dbname = "dpeatdecomposition",
default.file = "~/my.cnf"
)
# get database as dm object
dm_dpeatdecomposition <-
dpeatdecomposition::dp_get_dm(con, learn_keys = TRUE)
Get most current IDs
id_last <-
list(
id_dataset =
dm_dpeatdecomposition %>%
dm::pull_tbl(datasets) %>%
dplyr::pull(id_dataset) %>%
tail(1),
id_sample =
dm_dpeatdecomposition %>%
dm::pull_tbl(samples) %>%
dplyr::pull(id_sample) %>%
tail(1),
id_measurement =
dm_dpeatdecomposition %>%
dm::pull_tbl(data) %>%
dplyr::pull(id_measurement) %>%
tail(1)
) %>%
purrr::map(function(.x) {
if(length(.x) == 0) {
0L
} else {
.x
}
})
Create directories
dir_name <- "d52"
dir_source <- "../raw_data/data/d52"
dir_target <- paste0("../derived_data/", id_last$id_dataset + 1L)
if(!dir.exists(dir_target)) {
dir.create(dir_target)
}
datasets <-
tibble::tibble(
id_dataset = id_last$id_dataset + 1L
)
citations_to_datasets <-
dplyr::bind_rows(
db_template_tables$citations_to_datasets,
tibble::tibble(
id_dataset = datasets$id_dataset,
id_citation = c("Bengtsson.2018a", "Bengtsson.2018")
)
)
# litter chemstry
samples1 <-
read.csv(paste0(dir_source, "/raw/bengtsson_etal_2018_LQ.csv")) %>%
dplyr::rename(
sample_label = "sample",
site_name = "site",
sampling_latitude = "latitude",
sampling_longitude = "longitude",
taxon_rank_value = "species.name",
site_type = "vegetation.type",
sample_microhabitat = "microtopographical.position",
holocellulose_mass_relative_mass = "HC_mg_g",
sphagnan_mass_relative_mass = "sphagn_litter_mg_g",
phenolics_tannic_acid_equivalents_mass_relative_mass = "phenolics_TA_mg_g",
phenolics_PHBA_equivalents_mass_relative_mass = "phenolics_PHBA_mg_g",
Klason_lignin_mass_relative_mass = "totalKL_mg_g",
soluble_Klason_lignin_mass_relative_mass = "solubleKL_mg_g",
cation_exchange_capacity_relative_mass = "CEC_meq_g",
N_relative_mass = "N_mg_g",
C_relative_mass = "C_mg_g",
P_relative_mass = "PO4_mg_g" #---question: the readme does not explicitly say whether this is elemental P or phosphate content
) %>%
dplyr::select(! dplyr::any_of(c("species.code", "author.citation", "section", "sphagn_HC_mg_g", "KL_mg_g", "CNratio", "abs_ratio_205_280", "solubleKL_perc_of_totalKL", "dev_100perc", "HWT2012", "losslab2b", "lossfield"))) %>%
dplyr::mutate(
dplyr::across(dplyr::any_of(c("holocellulose_mass_relative_mass", "sphagnan_mass_relative_mass", "phenolics_tannic_acid_equivalents_mass_relative_mass", "phenolics_PHBA_equivalents_mass_relative_mass", "N_relative_mass", "C_relative_mass", "P_relative_mass", "soluble_Klason_lignin_mass_relative_mass", "Klason_lignin_mass_relative_mass")), function(.x) {
.x %>%
units::set_units("mg/g") %>%
units::set_units("g/g") %>%
units::drop_units()
}),
dplyr::across(
dplyr::ends_with("_relative_mass"),
function(.x) NA_real_,
.names = "{.col}_absolute"
),
plot_label = {
digit <-
sample_label %>%
stringr::str_extract(pattern = "\\d+$") %>%
as.numeric()
digit <- ifelse(digit < 10, paste0("0", digit), as.character(digit))
rest <-
sample_label %>%
stringr::str_remove(pattern = "\\d+$")
paste0(rest, digit)
},
taxon_rank_value =
taxon_rank_value %>%
stringr::str_replace(pattern = "_", replacement = " "),
taxon_rank_name = "species",
shade =
dplyr::case_when(
shade == "1open" ~ "open",
shade == "2semi" ~ "semi-open",
shade == "3dark" ~ "shaded"
),
sampling_year = 2013,
sampling_month = 10,
sampling_day = NA_integer_,
sample_treatment = "control",
sample_type = "litter",
taxon_organ = "shoots",
sample_depth_upper = 1,
sample_depth_lower =
dplyr::case_when(
taxon_rank_value == "Sphagnum tenellum" ~ 3,
TRUE ~ 4
),
dplyr::across(dplyr::any_of(c("site_type", "microhabitat")), tolower),
comments_samples = "Coordinates are only the approximate location of the study site, but not sampling points. The exact day of sampling is unknown. Each sample corresponds to one separate patch (plot, identified by `plot_label`) on the peatlands from which samples were taken. Samples were collected at the same patches as samples from @Bengtsson.2016, but are not the same samples. I assumed that the capitulum of moss shoots had a length of 1 cm.",
comments_samples =
dplyr::case_when(
taxon_rank_value == "Sphagnum balticum" ~ paste0(comments_samples, " @Bengtsson.2018 mentions that for some replicates for S. balticum, shoots of a length of only 2 cm were collected."),
TRUE ~ comments_samples
),
id_dataset = datasets$id_dataset[[1]],
is_incubated = FALSE,
incubation_duration = 0.0,
incubation_environment = "peat",
id_sample = seq_len(nrow(.)) + id_last$id_sample,
id_sample_origin = id_sample,
id_sample_parent = id_sample,
id_sample_incubation_start = NA_integer_,
experimental_design =
paste0(
as.numeric(as.factor(sample_treatment)), "_",
as.numeric(as.factor(site_name)), "_",
as.numeric(as.factor(plot_label))
),
mass_absolute = NA_real_
) %>%
dplyr::rename_with(
.fn = function(.x) {
stringr::str_replace(.x, "_relative_mass_absolute$", "_absolute")
}
)
## combine
samples <-
dplyr::bind_rows(
db_template_tables$samples,
samples1 %>%
dplyr::mutate(
type = "samples1"
)
)
samples_to_samples <-
samples %>%
dplyr::filter(! id_sample %in% id_sample_origin) %>%
dplyr::mutate(
transition_description =
dplyr::case_when(
type %in% c("samples2") ~ "translocate",
type %in% c("samples3") ~ "wait",
TRUE ~ NA_character_
)
) %>%
dplyr::select(id_sample_parent, id_sample, transition_description) %>%
dplyr::rename(
id_sample_child = "id_sample"
)
d1 <-
samples1 %>%
tidyr::pivot_longer(
cols = dplyr::any_of(c("mass_absolute", "mass_relative_mass", "mesh_size_absolute", paste0(PeriodicTable:::periodicTable$symb, "_relative_mass"), paste0(PeriodicTable:::periodicTable$symb, "_absolute"),
paste0(c("soluble_Klason_lignin_mass", "Klason_lignin_mass", "holocellulose_mass", "sphagnan_mass", "phenolics_tannic_acid_equivalents_mass", "phenolics_PHBA_equivalents_mass", "cation_exchange_capacity", "soluble_Klason_lignin_mass"), "_relative_mass"),
paste0(c("soluble_Klason_lignin_mass", "Klason_lignin_mass", "holocellulose_mass", "sphagnan_mass", "phenolics_tannic_acid_equivalents_mass", "phenolics_PHBA_equivalents_mass", "cation_exchange_capacity", "soluble_Klason_lignin_mass"), "_absolute"))),
names_to = "attribute_name",
values_to = "value"
) %>%
dplyr::mutate(
id_measurement = seq_len(nrow(.)) + id_last$id_measurement,
id_measurement_numerator =
purrr::map_int(seq_len(nrow(.)), function(i) {
if(attribute_name[[i]] == "mass_relative_mass") {
id_measurement[id_sample == id_sample[[i]] & attribute_name == "mass_absolute"]
} else if (stringr::str_detect(attribute_name[[i]], pattern = "_relative_mass$")) {
id_measurement[id_sample == id_sample[[i]] & attribute_name == stringr::str_replace(attribute_name[[i]], "_relative_mass2?", "_absolute")]
} else {
NA_integer_
}
}),
id_measurement_denominator =
purrr::map_int(seq_len(nrow(.)), function(i) {
if(attribute_name[[i]] == "mass_relative_mass") {
id_measurement[id_sample == id_sample_incubation_start[[i]] & attribute_name == "mass_absolute"]
} else if (stringr::str_detect(attribute_name[[i]], pattern = "_relative_mass$")) {
id_measurement[id_sample == id_sample[[i]] & attribute_name == "mass_absolute"]
} else {
NA_integer_
}
}),
value_type = "point",
sample_size = 1L
)
# combine
d <-
dplyr::bind_rows(
db_template_tables$data,
d1
) %>%
dplyr::select(dplyr::all_of(colnames(db_template_tables$data)))
experimental_design_format <-
tibble::tibble(
id_dataset = datasets$id_dataset,
file = paste0(id_last$id_dataset + 1L, "/experimental_design_format.csv"),
experimental_design_description = "`site_name`: Character value: Site where the samples grew (and were collected). `plot_label`: Character value. Plot label where the samples grew (and from which samples were collected). `site_type`: Character type describing the peatland type. `shade`: Character value describing how much the floow is shaded by trees/shrubs."
)
# csv file to export
experimental_design_format2 <-
samples %>%
dplyr::filter(! is.na(experimental_design)) %>%
dplyr::filter(! duplicated(experimental_design)) %>%
dplyr::select(experimental_design, sample_treatment, site_name, plot_label, site_type, shade)
# export
write.csv(experimental_design_format2, paste0(dir_target, "/experimental_design_format.csv"), row.names = FALSE)
# list all tables
dm_insert_in <-
list(
datasets =
datasets %>%
dplyr::select(dplyr::all_of(colnames(dm_dpeatdecomposition$datasets))),
samples =
samples %>%
dplyr::select(dplyr::all_of(colnames(dm_dpeatdecomposition$samples))),
data =
d %>%
dplyr::select(dplyr::all_of(colnames(dm_dpeatdecomposition$data))),
samples_to_samples =
samples_to_samples %>%
dplyr::select(dplyr::all_of(colnames(dm_dpeatdecomposition$samples_to_samples))),
citations_to_datasets =
citations_to_datasets %>%
dplyr::select(dplyr::all_of(colnames(dm_dpeatdecomposition$citations_to_datasets))),
experimental_design_format =
experimental_design_format %>%
dplyr::select(dplyr::all_of(colnames(dm_dpeatdecomposition$experimental_design_format)))
)
# check whether all column names as present in table attributes
all_column_names <-
purrr::map(dm_insert_in, colnames) %>%
unlist() %>%
unique()
if(! all(all_column_names %in% (dm_dpeatdecomposition %>% dm::pull_tbl(attributes) %>% dplyr::pull(attribute_name)))) {
cond <- purrr::map_lgl(all_column_names, function(.x) ! .x %in% (dm_dpeatdecomposition %>% dm::pull_tbl(attributes) %>% dplyr::pull(attribute_name)))
RMariaDB::dbDisconnect(con)
stop(paste0("New `attribute_name`s discovered: ", paste(all_column_names[cond], collapse = ", ")))
}
all_data_attributes <- unique(dm_insert_in$data$attribute_name)
if(! all(all_data_attributes %in% (dm_dpeatdecomposition %>% dm::pull_tbl(attributes) %>% dplyr::pull(attribute_name)))) {
cond <- purrr::map_lgl(all_data_attributes, function(.x) ! .x %in% (dm_dpeatdecomposition %>% dm::pull_tbl(attributes) %>% dplyr::pull(attribute_name)))
stop(paste0("New `attribute_name`s discovered: ", paste(all_data_attributes[cond], collapse = ", ")))
RMariaDB::dbDisconnect(con)
}
# filter empty tables
dm_insert_in_check <-
dm_insert_in[purrr::map_lgl(dm_insert_in, function(x) nrow(x) > 0)] %>%
dm::as_dm() %>%
dp_dm_add_keys(dm_dpeatdecomposition)
# copy into dm_pmird
for(i in seq_along(dm_insert_in)) {
RMariaDB::dbAppendTable(con, name = names(dm_insert_in)[[i]], value = dm_insert_in[[i]])
}
RMariaDB::dbDisconnect(con)