# packages
library(metaDigitise)
library(magrittr)
library(tibble)
library(dplyr)
library(lubridate)
library(dpeatdecomposition)
library(dm)
library(RMariaDB)
Connect to database
# connect to database
con <-
RMariaDB::dbConnect(
drv = RMariaDB::MariaDB(),
dbname = "dpeatdecomposition",
default.file = "~/my.cnf"
)
# get database as dm object
dm_dpeatdecomposition <-
dpeatdecomposition::dp_get_dm(con, learn_keys = TRUE)
Get most current IDs
id_last <-
list(
id_dataset =
dm_dpeatdecomposition %>%
dm::pull_tbl(datasets) %>%
dplyr::pull(id_dataset) %>%
tail(1),
id_sample =
dm_dpeatdecomposition %>%
dm::pull_tbl(samples) %>%
dplyr::pull(id_sample) %>%
tail(1),
id_measurement =
dm_dpeatdecomposition %>%
dm::pull_tbl(data) %>%
dplyr::pull(id_measurement) %>%
tail(1)
) %>%
purrr::map(function(.x) {
if(length(.x) == 0) {
0L
} else {
.x
}
})
Create directories
dir_name <- "d18"
dir_source <- "../raw_data/data/d18"
dir_target <- paste0("../derived_data/", id_last$id_dataset + 1L)
if(!dir.exists(dir_target)) {
dir.create(dir_target)
}
datasets <-
tibble::tibble(
id_dataset = id_last$id_dataset + 1L
)
citations_to_datasets <-
dplyr::bind_rows(
db_template_tables$citations_to_datasets,
tibble::tibble(
id_dataset = datasets$id_dataset,
id_citation = c("Hiroki.1996")
)
)
## mass remaining
samples2 <-
tibble::tibble(
id_dataset = datasets$id_dataset[[1]],
sample_type = "cellulose",
sample_type2 = "cellulose filter tips Advantec No. 514A and No. 526",
id_site = c(2, 4, 6, 7, 13, 3, 8, 10, 12, 1, 5, 9, 11, 14),
mass_relative_mass = 1 - c(NA_real_, 0.58, 0.27, 0.52, 0.34, 0.53, 0.43, 0.75, 0.59, 0.37, 0.33, 0.03, 0.64, 0.07),
mass_absolute = NA_real_,
sample_microhabitat = c(rep("hummock", 5), rep("hollow", 4), NA, NA, "hollow", NA, NA),
experimental_design = as.character(id_site),
sampling_date = as.Date("1992-11-27"),
sampling_year = lubridate::year(sampling_date),
sampling_month = lubridate::month(sampling_date),
sampling_day = lubridate::day(sampling_date),
sample_depth_upper = 5,
sample_depth_lower = 5,
sample_treatment = "control",
is_incubated = TRUE,
incubation_environment = "peat",
incubation_duration = lubridate::time_length(as.Date("1992-11-27") - as.Date("1992-04-28"), unit = "days"),
sampling_longitude =
"139°35'E" %>%
sp::char2dms(chd = "°", chm = "'", chs = "''") %>%
as.numeric(),
sampling_latitude =
"37°15'N" %>%
sp::char2dms(chd = "°", chm = "'", chs = "''") %>%
as.numeric()
) %>%
dplyr::mutate(
comment_samples = "Coordinates are only the approximate location of the study site, but not sampling points.",
comment_samples =
paste0(comment_samples,
dplyr::case_when(
id_site %in% c(1, 11) ~ " Cellulose strips buried in peat along a stream.",
id_site %in% c(5, 14) ~ " Cellulose strips buried in peat in a stream.",
TRUE ~ ""
)
)
)
# initial samples
samples1 <-
samples2 %>%
dplyr::mutate(
id_sample = seq_len(nrow(.)) + id_last$id_sample,
id_sample_origin = id_sample,
id_sample_parent = id_sample,
id_sample_incubation_start = id_sample,
mass_relative_mass = 1,
incubation_duration = 0,
sampling_date = as.Date("1992-04-28"),
sampling_year = lubridate::year(sampling_date),
sampling_month = lubridate::month(sampling_date),
sampling_day = lubridate::day(sampling_date)
)
# assign ids
samples2 <-
samples2 %>%
dplyr::mutate(
id_sample = seq_len(nrow(.)) + max(samples1$id_sample),
id_sample_origin =
dplyr::left_join(
samples2 %>% dplyr::select(id_site),
samples1 %>% dplyr::select(id_sample, id_site),
by = "id_site"
) %>%
dplyr::pull(id_sample),
id_sample_parent = id_sample_origin,
id_sample_incubation_start = id_sample_origin
)
## peat properties
# order of id_site in plot x axes
id_sites <- c(2, 4, 6, 7, 13, 3, 8, 10, 12, 1, 5, 9, 11, 14)
samples3 <-
dplyr::bind_cols(
readRDS(paste0(dir_source, "/raw/caldat/Hiroki.1996-Fig2a"))$processed_data %>%
dplyr::select(y) %>%
dplyr::rename(
pH = "y"
),
readRDS(paste0(dir_source, "/raw/caldat/Hiroki.1996-Fig2c"))$processed_data %>%
dplyr::select(y) %>%
dplyr::rename(
C_relative_mass = "y"
),
readRDS(paste0(dir_source, "/raw/caldat/Hiroki.1996-Fig2d"))$processed_data %>%
dplyr::select(y) %>%
dplyr::rename(
N_relative_mass = "y"
),
readRDS(paste0(dir_source, "/raw/caldat/Hiroki.1996-Fig2f"))$processed_data %>%
dplyr::select(y) %>%
dplyr::rename(
ash_mass_relative_mass = "y"
)
) %>%
dplyr::mutate(
C_absolute = NA_real_,
N_absolute = NA_real_,
ash_mass_absolute = NA_real_,
id_site = id_sites,
sampling_date = as.Date("1992-08-24"), #---note: the paper mentions two pH measurement dates in the text, but does not clarify what values in the plot represent. I here assume these are values from the first sampling date.
sample_depth_upper = 0,
sample_depth_lower = 5
)
# water content
samples3 <-
dplyr::bind_rows(
readRDS(paste0(dir_source, "/raw/caldat/Hiroki.1996-Fig3a"))$processed_data,
readRDS(paste0(dir_source, "/raw/caldat/Hiroki.1996-Fig3b"))$processed_data,
readRDS(paste0(dir_source, "/raw/caldat/Hiroki.1996-Fig3c"))$processed_data
) %>%
dplyr::select(id, x, y) %>%
dplyr::mutate(
id_site =
id %>%
stringr::str_extract(pattern = "\\d+$") %>%
as.numeric(),
x = as.Date(c(rep(c("1992-04-28", "1992-06-22", "1992-08-24", "1992-10-27"), 2), c("1992-04-28", "1992-06-22", "1992-10-27"), rep(c("1992-04-28", "1992-06-22", "1992-08-24", "1992-10-27"), 11))),
y = y/(y + 1)
) %>%
dplyr::rename(
sampling_date = "x",
water_mass_relative_mass = "y"
) %>%
dplyr::mutate(
water_mass_absolute = NA_real_,
mass_absolute = NA_real_,
sample_depth_upper = 0, #---note: the text says only that a 5 cm deep layer was sampled.
sample_depth_lower = 5, #---note: the text says only that a 5 cm deep layer was sampled.
) %>%
dplyr::full_join(
samples3,
by = c("id_site", "sample_depth_upper", "sample_depth_lower", "sampling_date")
) %>%
dplyr::arrange(sampling_date) %>%
dplyr::mutate(
sample_wet_mass_absolute = NA_real_,
id_dataset = datasets$id_dataset[[1]],
id_sample = seq_len(nrow(.)) + max(samples2$id_sample),
id_sample_origin = id_sample,
id_sample_parent = id_sample,
sampling_year = lubridate::year(sampling_date),
sampling_month = lubridate::month(sampling_date),
sampling_day = lubridate::day(sampling_date),
sample_type = "peat",
is_incubated = FALSE,
incubation_duration = 0,
sample_treatment = "control",
comment_samples = "Coordinates are only the approximate location of the study site, but not sampling points.",
comment_samples =
paste0(comment_samples,
dplyr::case_when(
id_site %in% c(1, 11) ~ " Sampling point: along a stream.",
id_site %in% c(5, 14) ~ " Sampling point: in a stream.",
TRUE ~ ""
)
)
) %>%
dplyr::full_join(
samples1 %>%
dplyr::select(id_site, sample_microhabitat, sampling_longitude, sampling_latitude, experimental_design),
by = "id_site"
)
## combine
samples <-
dplyr::bind_rows(
db_template_tables$samples,
samples1 %>%
dplyr::mutate(
type = "samples1"
),
samples2 %>%
dplyr::mutate(
type = "samples2"
),
samples3 %>%
dplyr::mutate(
type = "samples3"
)
)
samples_to_samples <-
samples %>%
dplyr::filter(! id_sample %in% id_sample_origin) %>%
dplyr::mutate(
transition_description =
dplyr::case_when(
type %in% c("samples2") ~ "wait",
TRUE ~ NA_character_
)
) %>%
dplyr::select(id_sample_parent, id_sample, transition_description) %>%
dplyr::rename(
id_sample_child = "id_sample"
)
d1 <-
samples1 %>%
tidyr::pivot_longer(
cols = dplyr::all_of(c("mass_absolute", "mass_relative_mass")),
names_to = "attribute_name",
values_to = "value"
) %>%
dplyr::mutate(
id_measurement = seq_len(nrow(.)) + id_last$id_measurement,
id_measurement_numerator =
purrr::map_int(seq_len(nrow(.)), function(i) {
switch(
attribute_name[[i]],
"mass_relative_mass" = {
id_measurement[id_sample == id_sample[[i]] & attribute_name == "mass_absolute"]
},
"P_relative_mass" =,
"N_relative_mass" =,
"K_relative_mass" =,
"C_relative_mass" =,
"ash_mass_relative_mass" = {
id_measurement[id_sample == id_sample[[i]] & attribute_name == paste0(stringr::str_remove(attribute_name[[i]], "_relative_mass$"), "_absolute")]
},
NA_integer_
)
}),
id_measurement_denominator =
purrr::map_int(seq_len(nrow(.)), function(i) {
switch(
attribute_name[[i]],
"mass_relative_mass" =,
"P_relative_mass" =,
"N_relative_mass" =,
"K_relative_mass" =,
"C_relative_mass" =,
"ash_mass_relative_mass" = {
id_measurement[id_sample == id_sample[[i]] & attribute_name == "mass_absolute"]
},
NA_integer_
)
}),
value_type = "mean",
sample_size = NA_integer_
)
d2 <-
samples2 %>%
tidyr::pivot_longer(
cols = dplyr::all_of(c("mass_absolute", "mass_relative_mass")),
names_to = "attribute_name",
values_to = "value"
) %>%
dplyr::mutate(
id_measurement = seq_len(nrow(.)) + max(d1$id_measurement),
id_measurement_numerator =
purrr::map_int(seq_len(nrow(.)), function(i) {
switch(
attribute_name[[i]],
"mass_relative_mass" = {
id_measurement[id_sample == id_sample[[i]] & attribute_name == "mass_absolute"]
},
NA_integer_
)
}),
id_measurement_denominator =
purrr::map_int(seq_len(nrow(.)), function(i) {
switch(
attribute_name[[i]],
"mass_relative_mass" = {
d1$id_measurement[d1$id_sample == id_sample_origin[[i]] & d1$attribute_name == "mass_absolute"]
},
NA_integer_
)
}),
value_type = "mean",
sample_size = NA_integer_
)
d3 <-
samples3 %>%
tidyr::pivot_longer(
cols = dplyr::all_of(c("mass_absolute", "sample_wet_mass_absolute", "water_mass_relative_mass", "water_mass_absolute", "pH", "C_relative_mass", "N_relative_mass", "ash_mass_relative_mass", "C_absolute", "N_absolute", "ash_mass_absolute")),
names_to = "attribute_name",
values_to = "value"
) %>%
dplyr::mutate(
id_measurement = seq_len(nrow(.)) + max(d2$id_measurement),
id_measurement_numerator =
purrr::map_int(seq_len(nrow(.)), function(i) {
switch(
attribute_name[[i]],
"N_relative_mass" =,
"C_relative_mass" =,
"ash_mass_relative_mass" = {
id_measurement[id_sample == id_sample[[i]] & attribute_name == paste0(stringr::str_remove(attribute_name[[i]], "_relative_mass$"), "_absolute")]
},
"water_mass_relative_mass" = {
id_measurement[id_sample == id_sample[[i]] & attribute_name == "water_mass_absolute"]
},
NA_integer_
)
}),
id_measurement_denominator =
purrr::map_int(seq_len(nrow(.)), function(i) {
switch(
attribute_name[[i]],
"water_mass_relative_mass" = {
id_measurement[id_sample == id_sample[[i]] & attribute_name == "sample_wet_mass_absolute"]
},
"N_relative_mass" =,
"C_relative_mass" =,
"ash_mass_relative_mass" = {
id_measurement[id_sample == id_sample[[i]] & attribute_name == "mass_absolute"]
},
"mass_relative_mass" = {
d1$id_measurement[d1$id_sample == id_sample_origin[[i]] & d1$attribute_name == "mass_absolute"]
},
NA_integer_
)
}),
value_type = "mean",
sample_size = NA_integer_
)
# combine
d <-
dplyr::bind_rows(
db_template_tables$data,
d1,
d2,
d3
) %>%
dplyr::select(dplyr::all_of(colnames(db_template_tables$data)))
experimental_design_format <-
tibble::tibble(
id_dataset = datasets$id_dataset,
file = paste0(id_last$id_dataset + 1L, "/experimental_design_format.csv"),
experimental_design_description = "`id_site`: An identifier for the site."
)
# csv file to export
experimental_design_format2 <-
samples %>%
dplyr::filter(! is.na(experimental_design)) %>%
dplyr::filter(! duplicated(experimental_design)) %>%
dplyr::select(experimental_design, id_site)
# export
write.csv(experimental_design_format2, paste0(dir_target, "/experimental_design_format.csv"), row.names = FALSE)
# list all tables
dm_insert_in <-
list(
datasets =
datasets %>%
dplyr::select(dplyr::all_of(colnames(dm_dpeatdecomposition$datasets))),
samples =
samples %>%
dplyr::select(dplyr::all_of(colnames(dm_dpeatdecomposition$samples))),
data =
d %>%
dplyr::select(dplyr::all_of(colnames(dm_dpeatdecomposition$data))),
samples_to_samples =
samples_to_samples %>%
dplyr::select(dplyr::all_of(colnames(dm_dpeatdecomposition$samples_to_samples))),
citations_to_datasets =
citations_to_datasets %>%
dplyr::select(dplyr::all_of(colnames(dm_dpeatdecomposition$citations_to_datasets))),
experimental_design_format =
experimental_design_format %>%
dplyr::select(dplyr::all_of(colnames(dm_dpeatdecomposition$experimental_design_format)))
)
# check whether all column names as present in table attributes
all_column_names <-
purrr::map(dm_insert_in, colnames) %>%
unlist() %>%
unique()
if(! all(all_column_names %in% (dm_dpeatdecomposition %>% dm::pull_tbl(attributes) %>% dplyr::pull(attribute_name)))) {
cond <- purrr::map_lgl(all_column_names, function(.x) ! .x %in% (dm_dpeatdecomposition %>% dm::pull_tbl(attributes) %>% dplyr::pull(attribute_name)))
RMariaDB::dbDisconnect(con)
stop(paste0("New `attribute_name`s discovered: ", paste(all_column_names[cond], collapse = ", ")))
}
all_data_attributes <- unique(dm_insert_in$data$attribute_name)
if(! all(all_data_attributes %in% (dm_dpeatdecomposition %>% dm::pull_tbl(attributes) %>% dplyr::pull(attribute_name)))) {
RMariaDB::dbDisconnect(con)
cond <- purrr::map_lgl(all_data_attributes, function(.x) ! .x %in% (dm_dpeatdecomposition %>% dm::pull_tbl(attributes) %>% dplyr::pull(attribute_name)))
stop(paste0("New `attribute_name`s discovered: ", paste(all_data_attributes[cond], collapse = ", ")))
}
# filter empty tables
dm_insert_in_check <-
dm_insert_in[purrr::map_lgl(dm_insert_in, function(x) nrow(x) > 0)] %>%
dm::as_dm() %>%
dp_dm_add_keys(dm_dpeatdecomposition)
# copy into dm_pmird
for(i in seq_along(dm_insert_in)) {
RMariaDB::dbAppendTable(con, name = names(dm_insert_in)[[i]], value = dm_insert_in[[i]])
}
RMariaDB::dbDisconnect(con)