# packages
library(metaDigitise)
library(magrittr)
library(tibble)
library(dplyr)
library(lubridate)
library(dpeatdecomposition)
library(dm)
library(RMariaDB)
Connect to database
# connect to database
con <-
RMariaDB::dbConnect(
drv = RMariaDB::MariaDB(),
dbname = "dpeatdecomposition",
default.file = "~/my.cnf"
)
# get database as dm object
dm_dpeatdecomposition <-
dpeatdecomposition::dp_get_dm(con, learn_keys = TRUE)
Get most current IDs
id_last <-
list(
id_dataset =
dm_dpeatdecomposition %>%
dm::pull_tbl(datasets) %>%
dplyr::pull(id_dataset) %>%
tail(1),
id_sample =
dm_dpeatdecomposition %>%
dm::pull_tbl(samples) %>%
dplyr::pull(id_sample) %>%
tail(1),
id_measurement =
dm_dpeatdecomposition %>%
dm::pull_tbl(data) %>%
dplyr::pull(id_measurement) %>%
tail(1)
) %>%
purrr::map(function(.x) {
if(length(.x) == 0) {
0L
} else {
.x
}
})
Create directories
dir_name <- "d35"
dir_source <- "../raw_data/data/d35"
dir_target <- paste0("../derived_data/", id_last$id_dataset + 1L)
if(!dir.exists(dir_target)) {
dir.create(dir_target)
}
datasets <-
tibble::tibble(
id_dataset = id_last$id_dataset + 1L
)
citations_to_datasets <-
dplyr::bind_rows(
db_template_tables$citations_to_datasets,
tibble::tibble(
id_dataset = datasets$id_dataset,
id_citation = c("Trinder.2008")
)
)
# mass remaining
samples2 <-
readODS::read_ods(paste0(dir_source, "/derived/Trinder.2008-Tab2.ods")) %>%
dplyr::rename(
mass_relative_mass = "mass_remaining",
mass_relative_mass_error = "mass_remaining_error",
mass_relative_mass_error_type = "mass_remaining_error_type",
mass_relative_mass_sample_size = "mass_remaining_sample_size",
comments_samples = "comment_samples",
mesh_size_absolute = "mesh_size",
sample_treatment = "treatment",
sampling_longitude = "site_longitude",
sampling_latitude = "site_latitude"
) %>%
dplyr::mutate(
sampling_year = 2004,
sampling_date =
as.Date(as.Date("2004-12-01") + lubridate::dweeks(incubation_duration)),
sampling_year = lubridate::year(sampling_date),
sampling_month = lubridate::month(sampling_date),
sampling_day = NA_real_,
dplyr::across(dplyr::any_of(c("mass_relative_mass", "mass_relative_mass_error")), function(.x) .x/100),
site_label = site_name,
mass_absolute = NA_real_,
is_incubated = TRUE,
incubation_environment = "peat",
id_dataset = datasets$id_dataset[[1]],
experimental_design =
paste0(
as.numeric(as.factor(site_label)), "_",
as.numeric(as.factor(incubation_plot_type))
),
type =
dplyr::case_when(
incubation_duration == 0 ~ "samples2",
TRUE ~ "samples3"
),
incubation_duration =
incubation_duration %>%
lubridate::dweeks() %>%
lubridate::time_length(unit = "days")
)
# litter collection
samples1 <-
samples2 %>%
dplyr::filter(!duplicated(paste0(taxon_rank_value, "_", taxon_organ))) %>%
dplyr::mutate(
id_sample = seq_len(nrow(.)) + id_last$id_sample,
id_sample_origin = id_sample,
id_sample_parent = id_sample,
id_sample_incubation_start = NA_integer_,
site_label =
dplyr::case_when(
taxon_rank_value == "Picea sitchensis" ~ NA_character_,
TRUE ~ site_label
),
site_name = site_label,
sampling_longitude =
dplyr::case_when(
taxon_rank_value == "Picea sitchensis" ~ NA_real_,
TRUE ~ sampling_longitude
),
sampling_latitude =
dplyr::case_when(
taxon_rank_value == "Picea sitchensis" ~ NA_real_,
TRUE ~ sampling_latitude
),
sample_treatment = "control",
experimental_design = NA_character_,
is_incubated = FALSE,
incubation_environment = NA_character_,
sample_depth_upper =
dplyr::case_when(
stringr::str_detect(taxon_rank_value, "Sphagnum") ~ 1,
TRUE ~ NA_real_
),
sample_depth_lower = NA_real_,
sampling_date = as.Date("2004-11-15"),
sampling_year = lubridate::year(sampling_date),
sampling_month = lubridate::month(sampling_date),
sampling_day = NA_real_ ,
incubation_duration =
incubation_duration %>%
lubridate::dweeks() %>%
lubridate::time_length(unit = "days")
)
# add missing ids
samples2 <-
samples2 %>%
dplyr::mutate(
id_sample = seq_len(nrow(.)) + max(samples1$id_sample),
id_sample_origin =
dplyr::left_join(
samples2 %>% dplyr::select(taxon_rank_value, taxon_organ),
samples1 %>% dplyr::select(taxon_rank_value, taxon_organ, id_sample),
by = c("taxon_rank_value", "taxon_organ")
) %>%
dplyr::pull(id_sample),
id_sample_incubation_start =
purrr::map_int(seq_len(nrow(.)), function(i) {
index <- paste0(taxon_rank_value, "_", site_label, "_", taxon_organ, "_", sample_depth_upper, "_", sample_depth_lower) == paste0(taxon_rank_value, "_", site_label, "_", taxon_organ, "_", sample_depth_upper, "_", sample_depth_lower)[[i]] & experimental_design == experimental_design[[i]] & incubation_duration == 0.0
id_sample[index]
}),
id_sample_parent =
purrr::map_int(seq_len(nrow(.)), function(i) {
index <- paste0(taxon_rank_value, "_", site_label, "_", taxon_organ, "_", sample_depth_upper, "_", sample_depth_lower) == paste0(taxon_rank_value, "_", site_label, "_", taxon_organ, "_", sample_depth_upper, "_", sample_depth_lower)[[i]] & experimental_design == experimental_design[[i]] & incubation_duration < incubation_duration[[i]]
if(! any(index)) {
id_sample_origin[[i]]
} else {
target_incubation_duration <- max(incubation_duration[index])
index <- index & incubation_duration == target_incubation_duration
id_sample[index]
}
})
) %>%
dplyr::mutate(
sample_type = "litter"
)
# litter chemistry
d35_peat_litter_chemistry <-
readODS::read_ods(paste0(dir_source, "/derived/Trinder.2008-Tab1.ods")) %>%
setNames(
nm =
colnames(.) %>%
stringr::str_replace(pattern = "^C", replacement = "C_relative_mass") %>%
stringr::str_replace(pattern = "^N", replacement = "N_relative_mass") %>%
stringr::str_replace(pattern = "^P", replacement = "P_relative_mass")
) %>%
dplyr::mutate(
dplyr::across(dplyr::starts_with(c("C", "N", "P")) & where(is.numeric), function(.x) .x/1000)
)
# C
d35_C <-
dplyr::bind_rows(
readRDS(paste0(dir_source, "/raw/caldat/Trinder.2008-Fig1a"))$processed_data,
readRDS(paste0(dir_source, "/raw/caldat/Trinder.2008-Fig1b"))$processed_data,
readRDS(paste0(dir_source, "/raw/caldat/Trinder.2008-Fig1c"))$processed_data,
readRDS(paste0(dir_source, "/raw/caldat/Trinder.2008-Fig1d"))$processed_data,
readRDS(paste0(dir_source, "/raw/caldat/Trinder.2008-Fig1e"))$processed_data
) %>%
dplyr::mutate(
incubation_plot_type =
dplyr::case_when(
stringr::str_detect(id, pattern = "low") ~ "low_water_table_level",
stringr::str_detect(id, pattern = "medium") ~ "medium_water_table_level",
stringr::str_detect(id, pattern = "high") ~ "high_water_table_level"
),
taxon_rank_value =
dplyr::case_when(
stringr::str_detect(id, pattern = "Callunavulgaris") ~ "Calluna vulgaris",
stringr::str_detect(id, pattern = "Eriophorumangustifolium") ~ "Eriophorum angustifolium",
stringr::str_detect(id, pattern = "Eriophorumvaginatum") ~ "Eriophorum vaginatum",
stringr::str_detect(id, pattern = "Piceasithensis") ~ "Picea sitchensis",
stringr::str_detect(id, pattern = "Sphagnumauriculatum") ~ "Sphagnum auriculatum"
),
incubation_duration =
dplyr::case_when(
stringr::str_detect(id, pattern = "sampling1") ~ 28,
stringr::str_detect(id, pattern = "sampling2") ~ 56,
stringr::str_detect(id, pattern = "sampling3") ~ 80
),
mean = mean/100,
error = error/100,
C_relative_mass2_error_type = "se",
C_relative_mass2_sample_size = 6L
)
d35_C <-
dplyr::bind_rows(
d35_C,
d35_C %>%
dplyr::filter(incubation_duration == 28) %>%
dplyr::mutate(
incubation_duration = 0,
mean = 1,
error = 0
)
) %>%
dplyr::rename(
C_relative_mass2 = "mean",
C_relative_mass2_error = "error"
) %>%
dplyr::select(-id, -n, -variable)
# N
d35_N <-
dplyr::bind_rows(
readRDS(paste0(dir_source, "/raw/caldat/Trinder.2008-Fig1f"))$processed_data,
readRDS(paste0(dir_source, "/raw/caldat/Trinder.2008-Fig1g"))$processed_data,
readRDS(paste0(dir_source, "/raw/caldat/Trinder.2008-Fig1h"))$processed_data,
readRDS(paste0(dir_source, "/raw/caldat/Trinder.2008-Fig1i"))$processed_data,
readRDS(paste0(dir_source, "/raw/caldat/Trinder.2008-Fig1j"))$processed_data
) %>%
dplyr::mutate(
incubation_plot_type =
dplyr::case_when(
stringr::str_detect(id, pattern = "low") ~ "low_water_table_level",
stringr::str_detect(id, pattern = "medium") ~ "medium_water_table_level",
stringr::str_detect(id, pattern = "high") ~ "high_water_table_level"
),
taxon_rank_value =
dplyr::case_when(
stringr::str_detect(id, pattern = "Callunavulgaris") ~ "Calluna vulgaris",
stringr::str_detect(id, pattern = "Eriophorumangustifolium") ~ "Eriophorum angustifolium",
stringr::str_detect(id, pattern = "Eriophorumvaginatum") ~ "Eriophorum vaginatum",
stringr::str_detect(id, pattern = "Piceasithensis") ~ "Picea sitchensis",
stringr::str_detect(id, pattern = "Sphagnumauriculatum") ~ "Sphagnum auriculatum"
),
incubation_duration =
dplyr::case_when(
stringr::str_detect(id, pattern = "sampling1") ~ 28,
stringr::str_detect(id, pattern = "sampling2") ~ 56,
stringr::str_detect(id, pattern = "sampling3") ~ 80
),
mean = mean/100,
error = error/100,
N_relative_mass2_error_type = "se",
N_relative_mass2_sample_size = 6L
)
d35_N <-
dplyr::bind_rows(
d35_N,
d35_N %>%
dplyr::filter(incubation_duration == 28) %>%
dplyr::mutate(
incubation_duration = 0,
mean = 1,
error = 0
)
) %>%
dplyr::rename(
N_relative_mass2 = "mean",
N_relative_mass2_error = "error"
) %>%
dplyr::select(-id, -n, -variable)
# P
d35_P <-
dplyr::bind_rows(
readRDS(paste0(dir_source, "/raw/caldat/Trinder.2008-Fig1k"))$processed_data,
readRDS(paste0(dir_source, "/raw/caldat/Trinder.2008-Fig1l"))$processed_data,
readRDS(paste0(dir_source, "/raw/caldat/Trinder.2008-Fig1m"))$processed_data,
readRDS(paste0(dir_source, "/raw/caldat/Trinder.2008-Fig1n"))$processed_data,
readRDS(paste0(dir_source, "/raw/caldat/Trinder.2008-Fig1o"))$processed_data
) %>%
dplyr::mutate(
incubation_plot_type =
dplyr::case_when(
stringr::str_detect(id, pattern = "low") ~ "low_water_table_level",
stringr::str_detect(id, pattern = "medium") ~ "medium_water_table_level",
stringr::str_detect(id, pattern = "high") ~ "high_water_table_level"
),
taxon_rank_value =
dplyr::case_when(
stringr::str_detect(id, pattern = "Callunavulgaris") ~ "Calluna vulgaris",
stringr::str_detect(id, pattern = "Eriophorumangustifolium") ~ "Eriophorum angustifolium",
stringr::str_detect(id, pattern = "Eriophorumvaginatum") ~ "Eriophorum vaginatum",
stringr::str_detect(id, pattern = "Piceasithensis") ~ "Picea sitchensis",
stringr::str_detect(id, pattern = "Sphagnumauriculatum") ~ "Sphagnum auriculatum"
),
incubation_duration =
dplyr::case_when(
stringr::str_detect(id, pattern = "sampling1") ~ 28,
stringr::str_detect(id, pattern = "sampling2") ~ 56,
stringr::str_detect(id, pattern = "sampling3") ~ 80
),
mean = mean/100,
error = error/100,
P_relative_mass2_error_type = "se",
P_relative_mass2_sample_size = 6L
)
d35_P <-
dplyr::bind_rows(
d35_P,
d35_P %>%
dplyr::filter(incubation_duration == 28) %>%
dplyr::mutate(
incubation_duration = 0,
mean = 1,
error = 0
)
) %>%
dplyr::rename(
P_relative_mass2 = "mean",
P_relative_mass2_error = "error"
) %>%
dplyr::select(-id, -n, -variable)
# add to samples2
samples2 <-
dplyr::left_join(
samples2,
d35_peat_litter_chemistry %>%
dplyr::mutate(
incubation_duration = 0.0
) %>%
dplyr::select(dplyr::all_of(c("taxon_rank_value", "incubation_duration")) | dplyr::starts_with(c("C_", "N_", "P_"))),
by = c("taxon_rank_value", "incubation_duration")
) %>%
dplyr::left_join(
purrr::reduce(list(d35_C, d35_N, d35_P), dplyr::left_join, by = c("incubation_plot_type", "taxon_rank_value", "incubation_duration")),
by = c("incubation_plot_type", "taxon_rank_value", "incubation_duration")
) %>%
dplyr::mutate(
C_absolute = NA_real_,
N_absolute = NA_real_,
P_absolute = NA_real_
)
# information on sites
samples4 <-
readODS::read_ods(paste0(dir_source, "/derived/Trinder.2008-text.ods")) %>%
dplyr::mutate(
id_dataset = datasets$id_dataset[[1]],
id_sample = seq_len(nrow(.)) + max(samples2$id_sample),
id_sample_origin = id_sample,
id_sample_parent = id_sample,
id_sample_incubation_start = NA_integer_,
is_incubated = FALSE,
incubation_duration = 0.0
) %>%
dplyr::left_join(
samples2 %>%
dplyr::filter(!duplicated(incubation_plot_type)) %>%
dplyr::select(incubation_plot_type, site_label, site_name, sampling_longitude, sampling_latitude, sample_treatment, comments_samples),
by = "incubation_plot_type"
)
## combine
samples <-
dplyr::bind_rows(
db_template_tables$samples,
samples1 %>%
dplyr::mutate(
type = "samples1"
),
samples2,
samples4 %>%
dplyr::mutate(
type = "samples4"
)
)
samples_to_samples <-
samples %>%
dplyr::filter(! id_sample %in% id_sample_origin) %>%
dplyr::mutate(
transition_description =
dplyr::case_when(
type %in% c("samples2") ~ "translocate",
type %in% c("samples3") ~ "wait",
TRUE ~ NA_character_
)
) %>%
dplyr::select(id_sample_parent, id_sample, transition_description) %>%
dplyr::rename(
id_sample_child = "id_sample"
)
d2 <-
samples2 %>%
tidyr::pivot_longer(
cols = dplyr::all_of(c("mass_absolute", "mass_relative_mass", "mesh_size_absolute", "N_relative_mass2", "C_relative_mass2", "P_relative_mass2", "C_absolute", "N_absolute", "P_absolute")),
names_to = "attribute_name",
values_to = "value"
) %>%
dplyr::mutate(
id_measurement = seq_len(nrow(.)) + id_last$id_measurement,
id_measurement_numerator =
purrr::map_int(seq_len(nrow(.)), function(i) {
switch(
attribute_name[[i]],
"N_relative_mass2" = ,
"C_relative_mass2" = ,
"P_relative_mass2" = {
id_measurement[id_sample == id_sample[[i]] & attribute_name == stringr::str_replace(attribute_name[[i]], "_relative_mass2?", "_absolute")]
},
"mass_relative_mass" = {
id_measurement[id_sample == id_sample[[i]] & attribute_name == "mass_absolute"]
},
NA_integer_
)
}),
id_measurement_denominator =
purrr::map_int(seq_len(nrow(.)), function(i) {
switch(
attribute_name[[i]],
"C_relative_mass2" = ,
"N_relative_mass2" = ,
"P_relative_mass2" = {
id_measurement[id_sample == id_sample_incubation_start[[i]] & attribute_name == stringr::str_replace(attribute_name[[i]], "_relative_mass2?", "_absolute")]
},
"mass_relative_mass" = {
id_measurement[id_sample == id_sample_incubation_start[[i]] & attribute_name == "mass_absolute"]
},
NA_integer_
)
}),
value_type =
dplyr::case_when(
attribute_name == "mesh_size_absolute" ~ "point",
TRUE ~ "mean"
)
)
d2_sample_size <-
samples2 %>%
tidyr::pivot_longer(
cols = dplyr::ends_with("_sample_size"),
names_to = "attribute_name",
values_to = "sample_size"
) %>%
dplyr::mutate(
attribute_name =
attribute_name %>%
stringr::str_remove(pattern = "_sample_size$")
) %>%
dplyr::select(id_sample, attribute_name, sample_size)
d2_error <-
samples2 %>%
tidyr::pivot_longer(
cols = dplyr::ends_with(c("_error")),
names_to = "attribute_name",
values_to = "error"
) %>%
dplyr::mutate(
attribute_name =
attribute_name %>%
stringr::str_remove(pattern = "_error$")
) %>%
dplyr::select(id_sample, attribute_name, error)
d2_error_type <-
samples2 %>%
tidyr::pivot_longer(
cols = dplyr::ends_with(c("_error_type")),
names_to = "attribute_name",
values_to = "error_type"
) %>%
dplyr::mutate(
attribute_name =
attribute_name %>%
stringr::str_remove(pattern = "_error_type$")
) %>%
dplyr::select(id_sample, attribute_name, error_type)
d2 <-
d2 %>%
dplyr::mutate(
error =
dplyr::left_join(d2, d2_error, by = c("id_sample", "attribute_name")) %>%
dplyr::pull(error),
error_type =
dplyr::left_join(d2, d2_error_type, by = c("id_sample", "attribute_name")) %>%
dplyr::pull(error_type),
sample_size =
dplyr::left_join(d2, d2_sample_size, by = c("id_sample", "attribute_name")) %>%
dplyr::pull(sample_size)
)
# correct attribute names
d2 <-
d2 %>%
dplyr::mutate(
attribute_name =
dplyr::case_when(
attribute_name %in% c("C_relative_mass2", "N_relative_mass2", "P_relative_mass2") ~ stringr::str_remove(attribute_name, "2$"),
TRUE ~ attribute_name
)
)
# site information
d4 <-
samples4 %>%
tidyr::pivot_longer(
cols = dplyr::all_of(c("water_table_depth")),
names_to = "attribute_name",
values_to = "value"
) %>%
dplyr::mutate(
id_measurement = seq_len(nrow(.)) + max(d2$id_measurement),
value_type = "mean",
comments_measurements = samples4$comment_measurements[[1]]
)
d4_sample_size <-
samples4 %>%
tidyr::pivot_longer(
cols = dplyr::ends_with("_sample_size"),
names_to = "attribute_name",
values_to = "sample_size"
) %>%
dplyr::mutate(
attribute_name =
attribute_name %>%
stringr::str_remove(pattern = "_sample_size$")
) %>%
dplyr::select(id_sample, attribute_name, sample_size)
d4_error <-
samples4 %>%
tidyr::pivot_longer(
cols = dplyr::ends_with(c("_error")),
names_to = "attribute_name",
values_to = "error"
) %>%
dplyr::mutate(
attribute_name =
attribute_name %>%
stringr::str_remove(pattern = "_error$")
) %>%
dplyr::select(id_sample, attribute_name, error)
d4_error_type <-
samples4 %>%
tidyr::pivot_longer(
cols = dplyr::ends_with(c("_error_type")),
names_to = "attribute_name",
values_to = "error_type"
) %>%
dplyr::mutate(
attribute_name =
attribute_name %>%
stringr::str_remove(pattern = "_error_type$")
) %>%
dplyr::select(id_sample, attribute_name, error_type)
d4 <-
d4 %>%
dplyr::mutate(
error =
dplyr::left_join(d4, d4_error, by = c("id_sample", "attribute_name")) %>%
dplyr::pull(error),
error_type =
dplyr::left_join(d4, d4_error_type, by = c("id_sample", "attribute_name")) %>%
dplyr::pull(error_type),
sample_size =
dplyr::left_join(d4, d4_sample_size, by = c("id_sample", "attribute_name")) %>%
dplyr::pull(sample_size)
)
# combine
d <-
dplyr::bind_rows(
db_template_tables$data,
d2,
d4,
) %>%
dplyr::select(dplyr::all_of(colnames(db_template_tables$data)))
experimental_design_format <-
tibble::tibble(
id_dataset = datasets$id_dataset,
file = paste0(id_last$id_dataset + 1L, "/experimental_design_format.csv"),
experimental_design_description = "`site_name`: Name of the site. `incubation_plot_type`: Description of the treatment (see the article for details)."
)
# csv file to export
experimental_design_format2 <-
samples %>%
dplyr::filter(! is.na(experimental_design)) %>%
dplyr::filter(! duplicated(experimental_design)) %>%
dplyr::select(experimental_design, site_name, incubation_plot_type)
# export
write.csv(experimental_design_format2, paste0(dir_target, "/experimental_design_format.csv"), row.names = FALSE)
# list all tables
dm_insert_in <-
list(
datasets =
datasets %>%
dplyr::select(dplyr::all_of(colnames(dm_dpeatdecomposition$datasets))),
samples =
samples %>%
dplyr::select(dplyr::all_of(colnames(dm_dpeatdecomposition$samples))),
data =
d %>%
dplyr::select(dplyr::all_of(colnames(dm_dpeatdecomposition$data))),
samples_to_samples =
samples_to_samples %>%
dplyr::select(dplyr::all_of(colnames(dm_dpeatdecomposition$samples_to_samples))),
citations_to_datasets =
citations_to_datasets %>%
dplyr::select(dplyr::all_of(colnames(dm_dpeatdecomposition$citations_to_datasets))),
experimental_design_format =
experimental_design_format %>%
dplyr::select(dplyr::all_of(colnames(dm_dpeatdecomposition$experimental_design_format)))
)
# check whether all column names as present in table attributes
all_column_names <-
purrr::map(dm_insert_in, colnames) %>%
unlist() %>%
unique()
if(! all(all_column_names %in% (dm_dpeatdecomposition %>% dm::pull_tbl(attributes) %>% dplyr::pull(attribute_name)))) {
cond <- purrr::map_lgl(all_column_names, function(.x) ! .x %in% (dm_dpeatdecomposition %>% dm::pull_tbl(attributes) %>% dplyr::pull(attribute_name)))
RMariaDB::dbDisconnect(con)
stop(paste0("New `attribute_name`s discovered: ", paste(all_column_names[cond], collapse = ", ")))
}
all_data_attributes <- unique(dm_insert_in$data$attribute_name)
if(! all(all_data_attributes %in% (dm_dpeatdecomposition %>% dm::pull_tbl(attributes) %>% dplyr::pull(attribute_name)))) {
RMariaDB::dbDisconnect(con)
cond <- purrr::map_lgl(all_data_attributes, function(.x) ! .x %in% (dm_dpeatdecomposition %>% dm::pull_tbl(attributes) %>% dplyr::pull(attribute_name)))
stop(paste0("New `attribute_name`s discovered: ", paste(all_data_attributes[cond], collapse = ", ")))
}
# filter empty tables
dm_insert_in_check <-
dm_insert_in[purrr::map_lgl(dm_insert_in, function(x) nrow(x) > 0)] %>%
dm::as_dm() %>%
dp_dm_add_keys(dm_dpeatdecomposition)
# copy into dm_pmird
for(i in seq_along(dm_insert_in)) {
RMariaDB::dbAppendTable(con, name = names(dm_insert_in)[[i]], value = dm_insert_in[[i]])
}
RMariaDB::dbDisconnect(con)