# packages
library(metaDigitise)
library(magrittr)
library(tibble)
library(dplyr)
library(lubridate)
library(dpeatdecomposition)
library(dm)
library(RMariaDB)

1 Preparations

Connect to database

# connect to database
con <-
  RMariaDB::dbConnect(
    drv = RMariaDB::MariaDB(),
    dbname = "dpeatdecomposition",
    default.file = "~/my.cnf"
  )

# get database as dm object
dm_dpeatdecomposition <-
  dpeatdecomposition::dp_get_dm(con, learn_keys = TRUE)

Get most current IDs

id_last <- 
  list(
    id_dataset = 
      dm_dpeatdecomposition %>%
      dm::pull_tbl(datasets) %>%
      dplyr::pull(id_dataset) %>%
      tail(1),
    id_sample = 
      dm_dpeatdecomposition %>%
      dm::pull_tbl(samples) %>%
      dplyr::pull(id_sample) %>%
      tail(1),
    id_measurement = 
      dm_dpeatdecomposition %>%
      dm::pull_tbl(data) %>%
      dplyr::pull(id_measurement) %>%
      tail(1)
    ) %>%
  purrr::map(function(.x) {
    if(length(.x) == 0) {
      0L
    } else {
      .x
    }
  })

Create directories

dir_name <- "d12"
dir_source <- "../raw_data/data/d12"
dir_target <- paste0("../derived_data/", id_last$id_dataset + 1L)

if(!dir.exists(dir_target)) {
  dir.create(dir_target)
}

2 Data wrangling

2.1 dataset

datasets <- 
  tibble::tibble(
    id_dataset = id_last$id_dataset + 1L
  )

2.2 citations_to_datasets

citations_to_datasets <- 
  dplyr::bind_rows(
    db_template_tables$citations_to_datasets,
    tibble::tibble(
      id_dataset = datasets$id_dataset,
      id_citation = c("Farrish.1985")
    )
  )

2.3 samples

# original material
samples1 <- 
  tibble::tibble(
    site_name = "Marcell Experimental Forest",
    sampling_longitude = -93.459632,
    sampling_latitude = 47.519077,
    sample_depth_upper = 1,
    sample_depth_lower = 10,
    sampling_year = 1981,
    sampling_month = 10,
    sampling_day = NA_integer_,
    sample_type = rep(c("litter", "paper"), each = 2L),
    sample_type2 = sample_type, 
    sample_microhabitat = rep(c("hummock", "hollow"), 2L),
    treatment = "control",
    incubation_environment = NA_character_,
    incubation_duration = 0,
    is_incubated = 0L,
    comment_samples = "No exact location given in the paper, approximate location of the Marcell experimental forest. `sample_depth_upper`: Assumed based on description that living material was removed and that most of the sample are mosses."
  ) %>%
  dplyr::mutate(
    id_dataset = id_last$id_dataset + 1L,
    id_sample = id_last$id_sample + seq_along(id_dataset),
    id_sample_origin = id_sample,
    id_sample_parent = id_sample
  )

# insert within litterbags into the peat
samples2 <-
  samples1 %>%
  dplyr::mutate(
    id_sample = seq_len(nrow(.)) + max(id_sample),
    id_sample_origin = samples1$id_sample,
    id_sample_parent = id_sample_origin,
    id_sample_incubation_start = id_sample,
    sampling_month = 11L,
    sample_depth_upper = 2,
    sample_depth_lower = 2,
    is_incubated = 1L,
    comment_samples = "No exact location given in the paper, approximate location of the Marcell experimental forest. `sample_depth_upper`, `sample_depth_lower`: Assumed based on description that litterbags were placed just beneath the surface."
  )

# first time to recollect litterbags
samples3 <-
  samples2 %>%
  dplyr::mutate(
    id_sample_parent = id_sample,
    id_sample = seq_len(nrow(.)) + max(id_sample),
    sampling_year = 1982,
    sampling_month = 11L,
    sample_depth_upper = 2,
    sample_depth_lower = 2,
    incubation_duration = 365,
    is_incubated = 1L,
    comment_samples = "No exact location given in the paper, approximate location of the Marcell experimental forest. `sample_depth_upper`, `sample_depth_lower`: Assumed based on description that litterbags were placed just beneath the surface. `incubation_duration`: Originally given as one year."
  )

## additional masurements

# water table depths in hollow
samples4 <-
  readRDS(paste0(dir_source, "/raw/caldat/Farrish.1985-Fig1a"))$processed_data %>%
  dplyr::select(x, y) %>%
  dplyr::rename(
    water_table_depth = "y"
  ) %>%
  dplyr::mutate(
    site_name = "Marcell Experimental Forest",
    sampling_longitude = -93.459632,
    sampling_latitude = 47.519077,
    sample_depth_upper = 0,
    sample_depth_lower = 0,
    sampling_date = as.Date("1982-06-01") + lubridate::days(as.integer(x)),
    sampling_year = lubridate::year(sampling_date),
    sampling_month = lubridate::month(sampling_date),
    sampling_day = lubridate::day(sampling_date),
    sample_type = "peat",
    sample_type2 = sample_type, 
    sample_microhabitat = "hollow",
    treatment = "control",
    incubation_environment = NA_character_,
    incubation_duration = 0,
    is_incubated = 0L,
    comment_samples = "No exact location given in the paper, approximate location of the Marcell experimental forest. `sampling_year`, `sampling_month`, `sampling_day`: Extracted from plot."
  ) %>%
  dplyr::mutate(
    id_dataset = id_last$id_dataset + 1L,
    id_sample = max(samples3$id_sample) + seq_along(id_dataset),
    id_sample_origin = id_sample,
    id_sample_parent = id_sample
  )
  
# water table depths in hummock
  samples5 <-
    samples4 %>%
    dplyr::mutate(
      sample_microhabitat = "hummock",
      id_sample = max(samples4$id_sample) + seq_along(id_dataset),
      id_sample_origin = id_sample,
      id_sample_parent = id_sample,
      water_table_depth = water_table_depth + 35
    )
  
  # temperature
  samples6 <- 
    readRDS(paste0(dir_source, "/raw/caldat/Farrish.1985-Fig1b"))$processed_data %>%
    dplyr::select(id, x, y) %>%
    dplyr::rename(
      temperature = "y",
      sample_microhabitat = "id"
    ) %>%
  dplyr::mutate(
    temperature = temperature + 273.15,
    site_name = "Marcell Experimental Forest",
    sampling_longitude = -93.459632,
    sampling_latitude = 47.519077,
    sample_depth_upper = 10,
    sample_depth_lower = 10,
    sampling_date = as.Date("1982-06-01") + lubridate::days(as.integer(x)),
    sampling_year = lubridate::year(sampling_date),
    sampling_month = lubridate::month(sampling_date),
    sampling_day = lubridate::day(sampling_date),
    sample_type = "peat",
    sample_type2 = sample_type, 
    treatment = "control",
    incubation_environment = NA_character_,
    incubation_duration = 0,
    is_incubated = 0L,
    comment_samples = "No exact location given in the paper, approximate location of the Marcell experimental forest. `sampling_year`, `sampling_month`, `sampling_day`: Extracted from plot."
  ) %>%
  dplyr::mutate(
    id_dataset = id_last$id_dataset + 1L,
    id_sample = max(samples5$id_sample) + seq_along(id_dataset),
    id_sample_origin = id_sample,
    id_sample_parent = id_sample
  )
  

# combine
samples <- 
  dplyr::bind_rows(
    db_template_tables$samples,
    samples1 %>%
      dplyr::mutate(
        type = "samples1"
      ),
    samples2 %>%
      dplyr::mutate(
        type = "samples2"
      ),
    samples3 %>%
      dplyr::mutate(
        type = "samples3"
      ),
    samples4 %>%
      dplyr::mutate(
        type = "samples4"
      ),
    samples5 %>%
      dplyr::mutate(
        type = "samples5"
      ),
    samples6 %>%
      dplyr::mutate(
        type = "samples6"
      )
  ) %>%
  dplyr::mutate(
    experimental_design =
      dplyr::case_when(
        sample_microhabitat == "hummock" ~ "1_1",
        sample_microhabitat == "hollow" ~ "1_2"
      )
  ) %>%
  dplyr::select(dplyr::all_of(c(colnames(db_template_tables$samples), "type")))

2.4 samples_to_samples

samples_to_samples <- 
  samples %>%
  dplyr::filter(! id_sample %in% id_sample_origin & ! type %in% paste0("samples", 4:6)) %>%
  dplyr::mutate(
    transition_description =
      dplyr::case_when(
        type == "samples2" ~ "relocate",
        type == "samples3" ~ "wait",
        TRUE ~ NA_character_
      )
  ) %>%
  dplyr::select(id_sample_parent, id_sample, transition_description) %>%
  dplyr::rename(
    id_sample_child = "id_sample"
  )

2.5 data

d2_1 <- 
  samples2 %>%
  dplyr::mutate(
    id_measurement = seq_len(nrow(.)) + id_last$id_measurement,
    attribute_name = "mass_absolute",
    value_type = "mean",
    error_type = "sd",
    sample_size = 
      dplyr::case_when(
        sample_type == "litter" ~ 10L,
        sample_type == "paper" ~ 5L
      ),
    reference_group = "site"
  )

d2_2 <- 
  d2_1 %>%
  dplyr::mutate(
    id_measurement_numerator = id_measurement,
    id_measurement_denominator = id_measurement,
    id_measurement = seq_len(nrow(.)) + max(id_measurement),
    attribute_name = "mass_relative_mass",
    value = 1.0,
    error = 0.0
  )

d3_1 <- 
  samples3 %>%
  dplyr::mutate(
    id_measurement = seq_len(nrow(.)) + max(d2_2$id_measurement),
    attribute_name = "mass_absolute",
    value_type = "mean",
    error_type = "sd",
    sample_size = 
      dplyr::case_when(
        sample_type == "litter" ~ 10L,
        sample_type == "paper" ~ 5L
      ),
    reference_group = "site"
  )

d3_2 <- 
  d3_1 %>%
  dplyr::mutate(
    id_measurement_numerator = id_measurement,
    id_measurement_denominator = d2_1$id_measurement,
    id_measurement = seq_len(nrow(.)) + max(d3_1$id_measurement),
    attribute_name = "mass_relative_mass",
    value = (100 - c(9.7, 5.2, 40.1, 12.9))/100,
    error = NA_real_
  )

d4 <- 
  samples4 %>%
  dplyr::rename(
    value = "water_table_depth"
  ) %>%
  dplyr::mutate(
    id_measurement = seq_len(nrow(.)) + max(d3_2$id_measurement),
    attribute_name = "water_table_depth",
    value_type = "mean",
    error_type = NA_character_,
    reference_group = "site",
    comments_measurement = "Data extracted from plot."
  )

d5 <- 
  samples5 %>%
  dplyr::rename(
    value = "water_table_depth"
  ) %>%
  dplyr::mutate(
    id_measurement = seq_len(nrow(.)) + max(d4$id_measurement),
    attribute_name = "water_table_depth",
    value_type = "mean",
    error_type = NA_character_,
    reference_group = "site",
    comments_measurement = "Data extracted from plot for hollows. The text says that hummocks were, on average, 35 cm higher than hollows. This distance was added to the water table depth values extracted for hollows."
  )

d6 <- 
  samples6 %>%
  dplyr::rename(
    value = "temperature"
  ) %>%
  dplyr::mutate(
    id_measurement = seq_len(nrow(.)) + max(d5$id_measurement),
    attribute_name = "temperature",
    value_type = "mean",
    error_type = NA_character_,
    reference_group = "site",
    comments_measurement = "Data extracted from plot."
  )

# combine
d <- 
  dplyr::bind_rows(
    db_template_tables$data,
    d2_1,
    d2_2,
    d3_1,
    d3_2,
    d4,
    d5,
    d6
  ) %>%
  dplyr::select(dplyr::all_of(colnames(db_template_tables$data)))

2.6 experimental_design_format

experimental_design_format <- 
  tibble::tibble(
    id_dataset = id_last$id_dataset + 1L,
    file = paste0(id_last$id_dataset + 1L, "/experimental_design_format.csv"),
    experimental_design_description = "`site_name`: Name of the site. `sample_microhabitat`: Name of the microhabitat type."
  )

# csv file to export
experimental_design_format2 <- 
  samples %>%
  dplyr::filter(! is.na(experimental_design)) %>%
  dplyr::filter(! duplicated(experimental_design)) %>%
  dplyr::select(experimental_design, site_name, sample_microhabitat)

# export
write.csv(experimental_design_format2, paste0(dir_target, "/experimental_design_format.csv"), row.names = FALSE)

3 Export to database

# list all tables
dm_insert_in <-
  list(
    datasets = 
      datasets %>% 
      dplyr::select(dplyr::all_of(colnames(dm_dpeatdecomposition$datasets))),
    samples = 
      samples %>% 
      dplyr::select(dplyr::all_of(colnames(dm_dpeatdecomposition$samples))),
    data = 
      d %>% 
      dplyr::select(dplyr::all_of(colnames(dm_dpeatdecomposition$data))),
    samples_to_samples = 
      samples_to_samples %>% 
      dplyr::select(dplyr::all_of(colnames(dm_dpeatdecomposition$samples_to_samples))),
    citations_to_datasets = 
      citations_to_datasets %>% 
      dplyr::select(dplyr::all_of(colnames(dm_dpeatdecomposition$citations_to_datasets))),
    experimental_design_format = 
      experimental_design_format %>% 
      dplyr::select(dplyr::all_of(colnames(dm_dpeatdecomposition$experimental_design_format)))
  )

# check whether all column names as present in table attributes
all_column_names <- 
  purrr::map(dm_insert_in, colnames) %>%
  unlist() %>%
  unique()

if(! all(all_column_names %in% (dm_dpeatdecomposition %>% dm::pull_tbl(attributes) %>% dplyr::pull(attribute_name)))) {
  cond <- purrr::map_lgl(all_column_names, function(.x) ! .x %in% (dm_dpeatdecomposition %>% dm::pull_tbl(attributes) %>% dplyr::pull(attribute_name)))
  RMariaDB::dbDisconnect(con)
  stop(paste0("New `attribute_name`s discovered: ", paste(all_column_names[cond], collapse = ", ")))
}

all_data_attributes <- unique(dm_insert_in$data$attribute_name)

if(! all(all_data_attributes %in% (dm_dpeatdecomposition %>% dm::pull_tbl(attributes) %>% dplyr::pull(attribute_name)))) {
  RMariaDB::dbDisconnect(con)
  cond <- purrr::map_lgl(all_data_attributes, function(.x) ! .x %in% (dm_dpeatdecomposition %>% dm::pull_tbl(attributes) %>% dplyr::pull(attribute_name)))
  stop(paste0("New `attribute_name`s discovered: ", paste(all_data_attributes[cond], collapse = ", ")))
}


# filter empty tables
dm_insert_in_check <-
  dm_insert_in[purrr::map_lgl(dm_insert_in, function(x) nrow(x) > 0)] %>%
  dm::as_dm() %>%
  dp_dm_add_keys(dm_dpeatdecomposition)

# copy into dm_pmird
for(i in seq_along(dm_insert_in)) {
  RMariaDB::dbAppendTable(con, name = names(dm_insert_in)[[i]], value = dm_insert_in[[i]])
}

RMariaDB::dbDisconnect(con)

4 Notes