# packages
library(metaDigitise)
library(magrittr)
library(tibble)
library(dplyr)
library(lubridate)
library(dpeatdecomposition)
library(dm)
library(RMariaDB)
Connect to database
# connect to database
con <-
RMariaDB::dbConnect(
drv = RMariaDB::MariaDB(),
dbname = "dpeatdecomposition",
default.file = "~/my.cnf"
)
# get database as dm object
dm_dpeatdecomposition <-
dpeatdecomposition::dp_get_dm(con, learn_keys = TRUE)
Get most current IDs
id_last <-
list(
id_dataset =
dm_dpeatdecomposition %>%
dm::pull_tbl(datasets) %>%
dplyr::pull(id_dataset) %>%
tail(1),
id_sample =
dm_dpeatdecomposition %>%
dm::pull_tbl(samples) %>%
dplyr::pull(id_sample) %>%
tail(1),
id_measurement =
dm_dpeatdecomposition %>%
dm::pull_tbl(data) %>%
dplyr::pull(id_measurement) %>%
tail(1)
) %>%
purrr::map(function(.x) {
if(length(.x) == 0) {
0L
} else {
.x
}
})
Create directories
dir_name <- "d12"
dir_source <- "../raw_data/data/d12"
dir_target <- paste0("../derived_data/", id_last$id_dataset + 1L)
if(!dir.exists(dir_target)) {
dir.create(dir_target)
}
datasets <-
tibble::tibble(
id_dataset = id_last$id_dataset + 1L
)
citations_to_datasets <-
dplyr::bind_rows(
db_template_tables$citations_to_datasets,
tibble::tibble(
id_dataset = datasets$id_dataset,
id_citation = c("Farrish.1985")
)
)
# original material
samples1 <-
tibble::tibble(
site_name = "Marcell Experimental Forest",
sampling_longitude = -93.459632,
sampling_latitude = 47.519077,
sample_depth_upper = 1,
sample_depth_lower = 10,
sampling_year = 1981,
sampling_month = 10,
sampling_day = NA_integer_,
sample_type = rep(c("litter", "paper"), each = 2L),
sample_type2 = sample_type,
sample_microhabitat = rep(c("hummock", "hollow"), 2L),
treatment = "control",
incubation_environment = NA_character_,
incubation_duration = 0,
is_incubated = 0L,
comment_samples = "No exact location given in the paper, approximate location of the Marcell experimental forest. `sample_depth_upper`: Assumed based on description that living material was removed and that most of the sample are mosses."
) %>%
dplyr::mutate(
id_dataset = id_last$id_dataset + 1L,
id_sample = id_last$id_sample + seq_along(id_dataset),
id_sample_origin = id_sample,
id_sample_parent = id_sample
)
# insert within litterbags into the peat
samples2 <-
samples1 %>%
dplyr::mutate(
id_sample = seq_len(nrow(.)) + max(id_sample),
id_sample_origin = samples1$id_sample,
id_sample_parent = id_sample_origin,
id_sample_incubation_start = id_sample,
sampling_month = 11L,
sample_depth_upper = 2,
sample_depth_lower = 2,
is_incubated = 1L,
comment_samples = "No exact location given in the paper, approximate location of the Marcell experimental forest. `sample_depth_upper`, `sample_depth_lower`: Assumed based on description that litterbags were placed just beneath the surface."
)
# first time to recollect litterbags
samples3 <-
samples2 %>%
dplyr::mutate(
id_sample_parent = id_sample,
id_sample = seq_len(nrow(.)) + max(id_sample),
sampling_year = 1982,
sampling_month = 11L,
sample_depth_upper = 2,
sample_depth_lower = 2,
incubation_duration = 365,
is_incubated = 1L,
comment_samples = "No exact location given in the paper, approximate location of the Marcell experimental forest. `sample_depth_upper`, `sample_depth_lower`: Assumed based on description that litterbags were placed just beneath the surface. `incubation_duration`: Originally given as one year."
)
## additional masurements
# water table depths in hollow
samples4 <-
readRDS(paste0(dir_source, "/raw/caldat/Farrish.1985-Fig1a"))$processed_data %>%
dplyr::select(x, y) %>%
dplyr::rename(
water_table_depth = "y"
) %>%
dplyr::mutate(
site_name = "Marcell Experimental Forest",
sampling_longitude = -93.459632,
sampling_latitude = 47.519077,
sample_depth_upper = 0,
sample_depth_lower = 0,
sampling_date = as.Date("1982-06-01") + lubridate::days(as.integer(x)),
sampling_year = lubridate::year(sampling_date),
sampling_month = lubridate::month(sampling_date),
sampling_day = lubridate::day(sampling_date),
sample_type = "peat",
sample_type2 = sample_type,
sample_microhabitat = "hollow",
treatment = "control",
incubation_environment = NA_character_,
incubation_duration = 0,
is_incubated = 0L,
comment_samples = "No exact location given in the paper, approximate location of the Marcell experimental forest. `sampling_year`, `sampling_month`, `sampling_day`: Extracted from plot."
) %>%
dplyr::mutate(
id_dataset = id_last$id_dataset + 1L,
id_sample = max(samples3$id_sample) + seq_along(id_dataset),
id_sample_origin = id_sample,
id_sample_parent = id_sample
)
# water table depths in hummock
samples5 <-
samples4 %>%
dplyr::mutate(
sample_microhabitat = "hummock",
id_sample = max(samples4$id_sample) + seq_along(id_dataset),
id_sample_origin = id_sample,
id_sample_parent = id_sample,
water_table_depth = water_table_depth + 35
)
# temperature
samples6 <-
readRDS(paste0(dir_source, "/raw/caldat/Farrish.1985-Fig1b"))$processed_data %>%
dplyr::select(id, x, y) %>%
dplyr::rename(
temperature = "y",
sample_microhabitat = "id"
) %>%
dplyr::mutate(
temperature = temperature + 273.15,
site_name = "Marcell Experimental Forest",
sampling_longitude = -93.459632,
sampling_latitude = 47.519077,
sample_depth_upper = 10,
sample_depth_lower = 10,
sampling_date = as.Date("1982-06-01") + lubridate::days(as.integer(x)),
sampling_year = lubridate::year(sampling_date),
sampling_month = lubridate::month(sampling_date),
sampling_day = lubridate::day(sampling_date),
sample_type = "peat",
sample_type2 = sample_type,
treatment = "control",
incubation_environment = NA_character_,
incubation_duration = 0,
is_incubated = 0L,
comment_samples = "No exact location given in the paper, approximate location of the Marcell experimental forest. `sampling_year`, `sampling_month`, `sampling_day`: Extracted from plot."
) %>%
dplyr::mutate(
id_dataset = id_last$id_dataset + 1L,
id_sample = max(samples5$id_sample) + seq_along(id_dataset),
id_sample_origin = id_sample,
id_sample_parent = id_sample
)
# combine
samples <-
dplyr::bind_rows(
db_template_tables$samples,
samples1 %>%
dplyr::mutate(
type = "samples1"
),
samples2 %>%
dplyr::mutate(
type = "samples2"
),
samples3 %>%
dplyr::mutate(
type = "samples3"
),
samples4 %>%
dplyr::mutate(
type = "samples4"
),
samples5 %>%
dplyr::mutate(
type = "samples5"
),
samples6 %>%
dplyr::mutate(
type = "samples6"
)
) %>%
dplyr::mutate(
experimental_design =
dplyr::case_when(
sample_microhabitat == "hummock" ~ "1_1",
sample_microhabitat == "hollow" ~ "1_2"
)
) %>%
dplyr::select(dplyr::all_of(c(colnames(db_template_tables$samples), "type")))
samples_to_samples <-
samples %>%
dplyr::filter(! id_sample %in% id_sample_origin & ! type %in% paste0("samples", 4:6)) %>%
dplyr::mutate(
transition_description =
dplyr::case_when(
type == "samples2" ~ "relocate",
type == "samples3" ~ "wait",
TRUE ~ NA_character_
)
) %>%
dplyr::select(id_sample_parent, id_sample, transition_description) %>%
dplyr::rename(
id_sample_child = "id_sample"
)
d2_1 <-
samples2 %>%
dplyr::mutate(
id_measurement = seq_len(nrow(.)) + id_last$id_measurement,
attribute_name = "mass_absolute",
value_type = "mean",
error_type = "sd",
sample_size =
dplyr::case_when(
sample_type == "litter" ~ 10L,
sample_type == "paper" ~ 5L
),
reference_group = "site"
)
d2_2 <-
d2_1 %>%
dplyr::mutate(
id_measurement_numerator = id_measurement,
id_measurement_denominator = id_measurement,
id_measurement = seq_len(nrow(.)) + max(id_measurement),
attribute_name = "mass_relative_mass",
value = 1.0,
error = 0.0
)
d3_1 <-
samples3 %>%
dplyr::mutate(
id_measurement = seq_len(nrow(.)) + max(d2_2$id_measurement),
attribute_name = "mass_absolute",
value_type = "mean",
error_type = "sd",
sample_size =
dplyr::case_when(
sample_type == "litter" ~ 10L,
sample_type == "paper" ~ 5L
),
reference_group = "site"
)
d3_2 <-
d3_1 %>%
dplyr::mutate(
id_measurement_numerator = id_measurement,
id_measurement_denominator = d2_1$id_measurement,
id_measurement = seq_len(nrow(.)) + max(d3_1$id_measurement),
attribute_name = "mass_relative_mass",
value = (100 - c(9.7, 5.2, 40.1, 12.9))/100,
error = NA_real_
)
d4 <-
samples4 %>%
dplyr::rename(
value = "water_table_depth"
) %>%
dplyr::mutate(
id_measurement = seq_len(nrow(.)) + max(d3_2$id_measurement),
attribute_name = "water_table_depth",
value_type = "mean",
error_type = NA_character_,
reference_group = "site",
comments_measurement = "Data extracted from plot."
)
d5 <-
samples5 %>%
dplyr::rename(
value = "water_table_depth"
) %>%
dplyr::mutate(
id_measurement = seq_len(nrow(.)) + max(d4$id_measurement),
attribute_name = "water_table_depth",
value_type = "mean",
error_type = NA_character_,
reference_group = "site",
comments_measurement = "Data extracted from plot for hollows. The text says that hummocks were, on average, 35 cm higher than hollows. This distance was added to the water table depth values extracted for hollows."
)
d6 <-
samples6 %>%
dplyr::rename(
value = "temperature"
) %>%
dplyr::mutate(
id_measurement = seq_len(nrow(.)) + max(d5$id_measurement),
attribute_name = "temperature",
value_type = "mean",
error_type = NA_character_,
reference_group = "site",
comments_measurement = "Data extracted from plot."
)
# combine
d <-
dplyr::bind_rows(
db_template_tables$data,
d2_1,
d2_2,
d3_1,
d3_2,
d4,
d5,
d6
) %>%
dplyr::select(dplyr::all_of(colnames(db_template_tables$data)))
experimental_design_format <-
tibble::tibble(
id_dataset = id_last$id_dataset + 1L,
file = paste0(id_last$id_dataset + 1L, "/experimental_design_format.csv"),
experimental_design_description = "`site_name`: Name of the site. `sample_microhabitat`: Name of the microhabitat type."
)
# csv file to export
experimental_design_format2 <-
samples %>%
dplyr::filter(! is.na(experimental_design)) %>%
dplyr::filter(! duplicated(experimental_design)) %>%
dplyr::select(experimental_design, site_name, sample_microhabitat)
# export
write.csv(experimental_design_format2, paste0(dir_target, "/experimental_design_format.csv"), row.names = FALSE)
# list all tables
dm_insert_in <-
list(
datasets =
datasets %>%
dplyr::select(dplyr::all_of(colnames(dm_dpeatdecomposition$datasets))),
samples =
samples %>%
dplyr::select(dplyr::all_of(colnames(dm_dpeatdecomposition$samples))),
data =
d %>%
dplyr::select(dplyr::all_of(colnames(dm_dpeatdecomposition$data))),
samples_to_samples =
samples_to_samples %>%
dplyr::select(dplyr::all_of(colnames(dm_dpeatdecomposition$samples_to_samples))),
citations_to_datasets =
citations_to_datasets %>%
dplyr::select(dplyr::all_of(colnames(dm_dpeatdecomposition$citations_to_datasets))),
experimental_design_format =
experimental_design_format %>%
dplyr::select(dplyr::all_of(colnames(dm_dpeatdecomposition$experimental_design_format)))
)
# check whether all column names as present in table attributes
all_column_names <-
purrr::map(dm_insert_in, colnames) %>%
unlist() %>%
unique()
if(! all(all_column_names %in% (dm_dpeatdecomposition %>% dm::pull_tbl(attributes) %>% dplyr::pull(attribute_name)))) {
cond <- purrr::map_lgl(all_column_names, function(.x) ! .x %in% (dm_dpeatdecomposition %>% dm::pull_tbl(attributes) %>% dplyr::pull(attribute_name)))
RMariaDB::dbDisconnect(con)
stop(paste0("New `attribute_name`s discovered: ", paste(all_column_names[cond], collapse = ", ")))
}
all_data_attributes <- unique(dm_insert_in$data$attribute_name)
if(! all(all_data_attributes %in% (dm_dpeatdecomposition %>% dm::pull_tbl(attributes) %>% dplyr::pull(attribute_name)))) {
RMariaDB::dbDisconnect(con)
cond <- purrr::map_lgl(all_data_attributes, function(.x) ! .x %in% (dm_dpeatdecomposition %>% dm::pull_tbl(attributes) %>% dplyr::pull(attribute_name)))
stop(paste0("New `attribute_name`s discovered: ", paste(all_data_attributes[cond], collapse = ", ")))
}
# filter empty tables
dm_insert_in_check <-
dm_insert_in[purrr::map_lgl(dm_insert_in, function(x) nrow(x) > 0)] %>%
dm::as_dm() %>%
dp_dm_add_keys(dm_dpeatdecomposition)
# copy into dm_pmird
for(i in seq_along(dm_insert_in)) {
RMariaDB::dbAppendTable(con, name = names(dm_insert_in)[[i]], value = dm_insert_in[[i]])
}
RMariaDB::dbDisconnect(con)