# packages
library(metaDigitise)
library(magrittr)
library(tibble)
library(dplyr)
library(lubridate)
library(dpeatdecomposition)
library(dm)
library(RMariaDB)

1 Preparations

Connect to database

# connect to database
con <-
  RMariaDB::dbConnect(
    drv = RMariaDB::MariaDB(),
    dbname = "dpeatdecomposition",
    default.file = "~/my.cnf"
  )

# get database as dm object
dm_dpeatdecomposition <-
  dpeatdecomposition::dp_get_dm(con, learn_keys = TRUE)

Get most current IDs

id_last <- 
  list(
    id_dataset = 
      dm_dpeatdecomposition %>%
      dm::pull_tbl(datasets) %>%
      dplyr::pull(id_dataset) %>%
      tail(1),
    id_sample = 
      dm_dpeatdecomposition %>%
      dm::pull_tbl(samples) %>%
      dplyr::pull(id_sample) %>%
      tail(1),
    id_measurement = 
      dm_dpeatdecomposition %>%
      dm::pull_tbl(data) %>%
      dplyr::pull(id_measurement) %>%
      tail(1)
    ) %>%
  purrr::map(function(.x) {
    if(length(.x) == 0) {
      0L
    } else {
      .x
    }
  })

Create directories

dir_name <- "d46"
dir_source <- "../raw_data/data/d46"
dir_target <- paste0("../derived_data/", id_last$id_dataset + 1L)

if(!dir.exists(dir_target)) {
  dir.create(dir_target)
}

2 Data wrangling

2.1 dataset

datasets <- 
  tibble::tibble(
    id_dataset = id_last$id_dataset + 1L
  )

2.2 citations_to_datasets

citations_to_datasets <- 
  dplyr::bind_rows(
    db_template_tables$citations_to_datasets,
    tibble::tibble(
      id_dataset = datasets$id_dataset,
      id_citation = c("Lieffers.1988") 
    )
  )

2.3 samples

# mass remaining
samples2 <-
  readODS::read_ods(paste0(dir_source, "/derived/Lieffers.1988-Tab2.ods")) %>%
  dplyr::rename(
    mass_relative_mass = "mass_loss",
    mass_relative_mass_error = "mass_remaining_error",
    mass_relative_mass_error_type = "mass_remaining_error_type",
    mass_relative_mass_sample_size = "mass_remaining_sample_size",
    mesh_size_absolute = "mesh_size",
    comments_samples = "comment_samples",
    sample_treatment = "treatment"
  ) %>%
  dplyr::mutate(
    taxon_organ = 
      dplyr::case_when(
        taxon_organ == "whole_plant" ~ "whole plant",
        TRUE ~ taxon_organ
      ),
    id_dataset = datasets$id_dataset[[1]],
    is_incubated = TRUE,
    incubation_environment = "peat",
    mass_absolute = NA_real_,
    mass_relative_mass = (100 - mass_relative_mass)/100,
    mass_relative_mass_error = mass_relative_mass_error/100,
    sampling_longitude =
      sampling_longitude %>%
      sp::char2dms(chd = "°", chm = "'", chs = "''") %>%
      as.numeric(),
    sampling_latitude = 
      sampling_latitude %>%
      sp::char2dms(chd = "°", chm = "'", chs = "''") %>%
      as.numeric(),
    sampling_year = lubridate::year(sampling_date),
    sampling_month = lubridate::month(sampling_date),
    sampling_day = lubridate::day(sampling_date),
    incubation_duration =
      incubation_duration %>%
      lubridate::dyears() %>%
      lubridate::time_length(unit = "days"),
    experimental_design =
      paste0(
        as.numeric(as.factor(site_name)), "_",
        as.numeric(as.factor(sample_treatment)), "_",
        as.numeric(as.factor(distance_to_drainage_ditch))
      )
  ) %>%
  dplyr::mutate(
    type =
      dplyr::case_when(
        incubation_duration == 0.0 ~ "samples2",
        TRUE ~ "samples3"
      )
  )


# sample collection
samples1 <- 
  samples2 %>%
  dplyr::filter(!duplicated(paste0(taxon_rank_value, "_", taxon_organ))) %>%
  dplyr::mutate(
    id_sample = seq_len(nrow(.)) + id_last$id_sample,
    id_sample_origin = id_sample,
    id_sample_parent = id_sample,
    id_sample_incubation_start = NA_integer_,
    site_name = 
      dplyr::case_when(
        sample_type == "cellulose" ~ NA_character_,
        TRUE ~ site_name
      ),
    site_label = site_name,
    sampling_longitude =
      dplyr::case_when(
        sample_type == "cellulose" ~ NA_real_,
        TRUE ~ sampling_longitude
      ),
    sampling_latitude =
      dplyr::case_when(
        sample_type == "cellulose" ~ NA_real_,
        TRUE ~ sampling_latitude
      ),
    sample_treatment = "control",
    experimental_design = NA_character_,
    is_incubated = FALSE,
    incubation_environment = NA_character_,
    sample_depth_upper = 
      dplyr::case_when(
        stringr::str_detect(taxon_rank_value, "Sphagnum") ~ 0,
        TRUE ~ NA_real_
      ),
    sample_depth_lower = 
      dplyr::case_when(
        stringr::str_detect(taxon_rank_value, "Sphagnum") ~ 10,
        TRUE ~ NA_real_
      ),
    sampling_date = NA_real_,
    sampling_year = NA_real_,
    sampling_month = NA_real_,
    sampling_day = NA_real_ 
  )

# add missing idd
samples2 <- 
  samples2 %>%
  dplyr::mutate(
    id_sample = seq_len(nrow(.)) + max(samples1$id_sample),
    id_sample_origin = 
      dplyr::left_join(
        samples2 %>% dplyr::select(sample_type, taxon_rank_value),
        samples1 %>% dplyr::select(sample_type, taxon_rank_value, id_sample),
        by = c("sample_type", "taxon_rank_value")
      ) %>%
      dplyr::pull(id_sample),
    id_sample_incubation_start = 
      purrr::map_int(seq_len(nrow(.)), function(i) {
        index <- paste0(sample_type, "_", taxon_rank_value, "_", site_name, "_", taxon_organ, "_", sample_depth_upper) == paste0(sample_type, "_", taxon_rank_value, "_", site_name, "_", taxon_organ, "_", sample_depth_upper)[[i]] & experimental_design == experimental_design[[i]] & incubation_duration == 0.0
        id_sample[index]
      }),
    id_sample_parent = 
      purrr::map_int(seq_len(nrow(.)), function(i) {
        index <- paste0(sample_type, "_", taxon_rank_value, "_", site_name, "_", taxon_organ, "_", sample_depth_upper) == paste0(sample_type, "_", taxon_rank_value, "_", site_name, "_", taxon_organ, "_", sample_depth_upper)[[i]] & experimental_design == experimental_design[[i]] & incubation_duration < incubation_duration[[i]]
        if(! any(index)) {
          id_sample_origin[[i]]
        } else {
          target_incubation_duration <- max(incubation_duration[index])
          index <- index & incubation_duration == target_incubation_duration
          id_sample[index]
        }
      })
  ) %>%
  dplyr::mutate(
    sample_type =
      dplyr::case_when(
        sample_type == "cellulose" ~ sample_type,
        TRUE ~ "litter"
      )
  )

# peat chemistry and physical properties
samples4 <- 
  readODS::read_ods(paste0(dir_source, "/derived/Lieffers.1988-Tab1.ods")) %>%
  dplyr::mutate(
    id_dataset = datasets$id_dataset[[1]],
    id_sample = seq_len(nrow(.)) + max(samples2$id_sample),
    id_sample_origin = id_sample,
    id_sample_parent = id_sample,
    id_sample_incubation_start = NA_integer_,
    sampling_longitude =
      sampling_longitude %>%
      sp::char2dms(chd = "°", chm = "'", chs = "''") %>%
      as.numeric(),
    sampling_latitude = 
      sampling_latitude %>%
      sp::char2dms(chd = "°", chm = "'", chs = "''") %>%
      as.numeric(),
    sampling_year = lubridate::year(sampling_date),
    sampling_month = lubridate::month(sampling_date),
    sampling_day = lubridate::day(sampling_date),
    incubation_duration = 0.0,
    is_incubated = FALSE,
    experimental_design =
      paste0(
        as.numeric(as.factor(site_name)), "_",
        as.numeric(as.factor(sample_treatment)), "_",
        as.numeric(as.factor(distance_to_drainage_ditch))
      ),
    mass_absolute = NA_real_,
    dplyr::across(
      dplyr::ends_with("_relative_mass"),
      function(.x) NA_real_,
      .names = "{.col}_absolute"
      )
  ) %>%
  dplyr::rename_with(
    .cols = dplyr::ends_with("_relative_mass_absolute", ignore.case = FALSE), 
    .fn = function(.x) {
      stringr::str_replace(.x, pattern = "_relative_mass_absolute$", replacement = "_absolute")
    } 
  )

# water table depth
samples5 <- 
  readRDS(paste0(dir_source, "/raw/caldat/Lieffers.1988-Fig1"))$processed_data %>%
  dplyr::select(id, x, y) %>%
  dplyr::rename(
    water_table_depth = "y"
  ) %>%
  dplyr::mutate(
    id_dataset = datasets$id_dataset[[1]],
    id_sample = seq_len(nrow(.)) + max(samples4$id_sample),
    id_sample_origin = id_sample,
    id_sample_parent = id_sample,
    id_sample_incubation_start = NA_integer_,
    sample_treatment =
      id %>%
      stringr::str_extract(pattern = "^treatment(control|drained)") %>%
      stringr::str_remove("^treatment"),
    distance_to_drainage_ditch = 
      id %>%
      stringr::str_extract(pattern = "distancetoditch\\d+\\.?\\d*$") %>%
      stringr::str_remove("^distancetoditch") %>%
      as.numeric(),
    sampling_date = 
      as.Date(as.Date("1986-06-01") + lubridate::ddays(round(x, 0))),
    sample_type = "peat",
    sample_depth_upper = 0,
    sample_depth_lower = 0,
    sampling_year = lubridate::year(sampling_date),
    sampling_month = lubridate::month(sampling_date),
    sampling_day = lubridate::day(sampling_date),
    incubation_duration = 0.0,
    is_incubated = FALSE,
    site_name = unique(samples2$site_name),
    sampling_longitude = unique(samples2$sampling_longitude),
    sampling_latitude = unique(samples2$sampling_latitude),
    comments_samples = paste0(unique(samples2$comments_samples), " `sampling_day` is only the approximate sampling day."),
    experimental_design =
      paste0(
        as.numeric(as.factor(site_name)), "_",
        as.numeric(as.factor(sample_treatment)), "_",
        as.numeric(as.factor(distance_to_drainage_ditch))
      )
  ) %>%
  dplyr::select(-x, -id)

# temperature
samples6 <- 
  dplyr::bind_rows(
    readRDS(paste0(dir_source, "/raw/caldat/Lieffers.1988-Fig3a"))$processed_data,
    readRDS(paste0(dir_source, "/raw/caldat/Lieffers.1988-Fig3b"))$processed_data
  ) %>%
  dplyr::select(id, x, y) %>%
  dplyr::rename(
    temperature = "y"
  ) %>%
  dplyr::mutate(
    id_dataset = datasets$id_dataset[[1]],
    id_sample = seq_len(nrow(.)) + max(samples5$id_sample),
    id_sample_origin = id_sample,
    id_sample_parent = id_sample,
    id_sample_incubation_start = NA_integer_,
    sample_treatment =
      id %>%
      stringr::str_extract(pattern = "^treatment(control|drained)") %>%
      stringr::str_remove("^treatment"),
    distance_to_drainage_ditch = 
      id %>%
      stringr::str_extract(pattern = "distancetoditch\\d+\\.?\\d*") %>%
      stringr::str_remove("^distancetoditch") %>%
      as.numeric(),
    sampling_date = 
      as.Date(as.Date("1986-05-01") + lubridate::ddays(round(x, 0))),
    sample_type = "peat",
    sample_depth_upper = 
      id %>%
      stringr::str_extract(pattern = "depth\\d+$") %>%
      stringr::str_remove("^depth") %>%
      as.numeric(),
    sample_depth_lower = 
      sample_depth_upper,
    sampling_year = lubridate::year(sampling_date),
    sampling_month = lubridate::month(sampling_date),
    sampling_day = lubridate::day(sampling_date),
    incubation_duration = 0.0,
    is_incubated = FALSE,
    site_name = unique(samples2$site_name),
    sampling_longitude = unique(samples2$sampling_longitude),
    sampling_latitude = unique(samples2$sampling_latitude),
    comments_samples = paste0(unique(samples2$comments_samples), " `sampling_day` is only the approximate sampling day."),
    experimental_design =
      paste0(
        as.numeric(as.factor(site_name)), "_",
        as.numeric(as.factor(sample_treatment)), "_",
        as.numeric(as.factor(distance_to_drainage_ditch))
      )
  ) %>%
  dplyr::select(-x, -id)

# water content
samples7 <- 
  dplyr::bind_rows(
    readRDS(paste0(dir_source, "/raw/caldat/Lieffers.1988-Fig2"))$processed_data
  ) %>%
  dplyr::select(-n, -variable) %>%
  dplyr::rename(
    water_mass_relative_volume = "mean",
    water_mass_relative_volume_error = "error"
  ) %>%
  dplyr::mutate(
    id_dataset = datasets$id_dataset[[1]],
    id_sample = seq_len(nrow(.)) + max(samples6$id_sample),
    id_sample_origin = id_sample,
    id_sample_parent = id_sample,
    id_sample_incubation_start = NA_integer_,
    water_mass_relative_volume_error_type = "se",
    water_mass_relative_volume_sample_size = 3L,
    sample_treatment =
      id %>%
      stringr::str_extract(pattern = "^treatment(control|drained)") %>%
      stringr::str_remove("^treatment"),
    distance_to_drainage_ditch = 
      id %>%
      stringr::str_extract(pattern = "distancetoditch\\d+\\.?\\d*") %>%
      stringr::str_remove("^distancetoditch") %>%
      as.numeric(),
    sample_type = "peat",
    sample_depth_upper = 
      id %>%
      stringr::str_extract(pattern = "depth\\d+") %>%
      stringr::str_remove("^depth") %>%
      as.numeric(),
    sample_depth_upper =
      dplyr::case_when(
        sample_depth_upper == 10 ~ 7,
        sample_depth_upper == 30 ~ 27
      ),
    sample_depth_lower =
      dplyr::case_when(
        sample_depth_upper == 7 ~ 12,
        sample_depth_upper == 27 ~ 32
      ),
    id_sampling =
      id %>%
      stringr::str_extract("sampling\\d*$") %>%
      stringr::str_remove("^sampling")
  ) %>%
  dplyr::left_join(
    readRDS(paste0(dir_source, "/raw/caldat/Lieffers.1988-Fig2_xaxis"))$processed_data %>%
      dplyr::select(x) %>%
      dplyr::mutate(
        sampling_date = 
          as.Date(as.Date("1986-07-01") + lubridate::ddays(round(x, 0))),
        id_sampling =
          as.character(seq_len(nrow(.)))
      ) %>%
      dplyr::select(-x),
    by = "id_sampling"
  ) %>%
  dplyr::select(-id, -id_sampling) %>%
  dplyr::mutate(
    sampling_year = lubridate::year(sampling_date),
    sampling_month = lubridate::month(sampling_date),
    sampling_day = lubridate::day(sampling_date),
    incubation_duration = 0.0,
    is_incubated = FALSE,
    site_name = unique(samples2$site_name),
    sampling_longitude = unique(samples2$sampling_longitude),
    sampling_latitude = unique(samples2$sampling_latitude),
    comments_samples = paste0(unique(samples2$comments_samples), " `sampling_day` is only the approximate sampling day."),
    experimental_design =
      paste0(
        as.numeric(as.factor(site_name)), "_",
        as.numeric(as.factor(sample_treatment)), "_",
        as.numeric(as.factor(distance_to_drainage_ditch))
      ),
    volume = NA_real_,
    water_mass_absolute = NA_real_
  )



## combine
samples <- 
  dplyr::bind_rows(
    db_template_tables$samples,
    samples1 %>%
      dplyr::mutate(
        type = "samples1"
      ) %>%
      dplyr::select(-sampling_date),
    samples2,
    samples4 %>%
      dplyr::mutate(
        type = "samples4"
      ),
    samples5 %>%
      dplyr::mutate(
        type = "samples5"
      ),
    samples6 %>%
      dplyr::mutate(
        type = "samples6"
      ),
    samples7 %>%
      dplyr::mutate(
        type = "samples7"
      )
  )

2.4 samples_to_samples

samples_to_samples <- 
  samples %>%
  dplyr::filter(! id_sample %in% id_sample_origin) %>%
  dplyr::mutate(
    transition_description =
      dplyr::case_when(
        type %in% c("samples2") ~ "translocate",
        type %in% c("samples3") ~ "wait",
        TRUE ~ NA_character_
      )
  ) %>%
  dplyr::select(id_sample_parent, id_sample, transition_description) %>%
  dplyr::rename(
    id_sample_child = "id_sample"
  )

2.5 data

d2 <- 
  samples2 %>%
  tidyr::pivot_longer(
    cols = dplyr::all_of(c("mass_absolute", "mass_relative_mass", "mesh_size_absolute")),
    names_to = "attribute_name",
    values_to = "value"
  ) %>%
  dplyr::mutate(
    id_measurement = seq_len(nrow(.)) + id_last$id_measurement,
    id_measurement_numerator =
      purrr::map_int(seq_len(nrow(.)), function(i) {
        switch(
          attribute_name[[i]],
          "mass_relative_mass" = {
            id_measurement[id_sample == id_sample[[i]] & attribute_name == "mass_absolute"]
          },
          NA_integer_
        )
      }),
    id_measurement_denominator =
      purrr::map_int(seq_len(nrow(.)), function(i) {
        switch(
          attribute_name[[i]],
          "mass_relative_mass" = {
            id_measurement[id_sample == id_sample_incubation_start[[i]] & attribute_name == "mass_absolute"]
          },
          NA_integer_
        )
      }),
    value_type = 
      dplyr::case_when(
        attribute_name == "mesh_size_absolute" ~ "point", 
        TRUE ~ "mean"
      )
  )

d2_sample_size <- 
  samples2 %>%
  tidyr::pivot_longer(
    cols = dplyr::ends_with("_sample_size"),
    names_to = "attribute_name",
    values_to = "sample_size"
  ) %>%
  dplyr::mutate(
    attribute_name = 
      attribute_name %>%
      stringr::str_remove(pattern = "_sample_size$")
  ) %>%
  dplyr::select(id_sample, attribute_name, sample_size)

d2_error <- 
  samples2 %>%
  tidyr::pivot_longer(
    cols = dplyr::ends_with(c("_error")),
    names_to = "attribute_name",
    values_to = "error"
  ) %>%
  dplyr::mutate(
    attribute_name = 
      attribute_name %>%
      stringr::str_remove(pattern = "_error$")
  ) %>%
  dplyr::select(id_sample, attribute_name, error)

d2_error_type <- 
  samples2 %>%
  tidyr::pivot_longer(
    cols = dplyr::ends_with(c("_error_type")),
    names_to = "attribute_name",
    values_to = "error_type"
  ) %>%
  dplyr::mutate(
    attribute_name = 
      attribute_name %>%
      stringr::str_remove(pattern = "_error_type$")
  ) %>%
  dplyr::select(id_sample, attribute_name, error_type)

d2 <- 
  d2 %>%
  dplyr::mutate(
    error =
      dplyr::left_join(d2, d2_error, by = c("id_sample", "attribute_name")) %>%
      dplyr::pull(error),
    error_type =
      dplyr::left_join(d2, d2_error_type, by = c("id_sample", "attribute_name")) %>%
      dplyr::pull(error_type),
    sample_size =
      dplyr::left_join(d2, d2_sample_size, by = c("id_sample", "attribute_name")) %>%
      dplyr::pull(sample_size)
  )


# peat chemistry
d4 <- 
  samples4 %>%
  tidyr::pivot_longer(
    cols = dplyr::all_of(c("mass_absolute", "N_relative_mass", "P_relative_mass", "K_relative_mass", "Ca_relative_mass", "Mg_relative_mass", "C_relative_mass", "ash_mass_relative_mass", "N_absolute", "P_absolute", "K_absolute", "Ca_absolute", "Mg_absolute", "C_absolute", "ash_mass_absolute")),
    names_to = "attribute_name",
    values_to = "value"
  ) %>%
  dplyr::mutate(
    id_measurement = seq_len(nrow(.)) + max(d2$id_measurement),
    id_measurement_numerator =
      purrr::map_int(seq_len(nrow(.)), function(i) {
        if(attribute_name[[i]] == "mass_relative_mass") {
          id_measurement[id_sample == id_sample[[i]] & attribute_name == "mass_absolute"]
        } else if (stringr::str_detect(attribute_name[[i]], pattern = "_relative_mass$")) {
          id_measurement[id_sample == id_sample[[i]] & attribute_name == stringr::str_replace(attribute_name[[i]], "_relative_mass2?", "_absolute")]
        } else {
          NA_integer_
        }
      }),
    id_measurement_denominator =
      purrr::map_int(seq_len(nrow(.)), function(i) {
        if(attribute_name[[i]] == "mass_relative_mass") {
          id_measurement[id_sample == id_sample_incubation_start[[i]] & attribute_name == "mass_absolute"]
        } else if (stringr::str_detect(attribute_name[[i]], pattern = "_relative_mass$")) {
          id_measurement[id_sample == id_sample[[i]] & attribute_name == "mass_absolute"]
        } else {
          NA_integer_
        }
      }),
    value_type = "mean"
  )


# water table depth
d5 <- 
  samples5 %>%
  tidyr::pivot_longer(
    cols = dplyr::all_of(c("water_table_depth")),
    names_to = "attribute_name",
    values_to = "value"
  ) %>%
  dplyr::mutate(
    id_measurement = seq_len(nrow(.)) + max(d4$id_measurement),
    id_measurement_numerator =
      purrr::map_int(seq_len(nrow(.)), function(i) {
        if(attribute_name[[i]] == "mass_relative_mass") {
          id_measurement[id_sample == id_sample[[i]] & attribute_name == "mass_absolute"]
        } else if (stringr::str_detect(attribute_name[[i]], pattern = "_relative_mass$")) {
          id_measurement[id_sample == id_sample[[i]] & attribute_name == stringr::str_replace(attribute_name[[i]], "_relative_mass2?", "_absolute")]
        } else {
          NA_integer_
        }
      }),
    id_measurement_denominator =
      purrr::map_int(seq_len(nrow(.)), function(i) {
        if(attribute_name[[i]] == "mass_relative_mass") {
          id_measurement[id_sample == id_sample_incubation_start[[i]] & attribute_name == "mass_absolute"]
        } else if (stringr::str_detect(attribute_name[[i]], pattern = "_relative_mass$")) {
          id_measurement[id_sample == id_sample[[i]] & attribute_name == "mass_absolute"]
        } else {
          NA_integer_
        }
      }),
    value_type = "mean"
  )

# temperature
d6 <- 
  samples6 %>%
  dplyr::mutate(
    temperature = temperature + 273.15 
  ) %>%
  tidyr::pivot_longer(
    cols = dplyr::all_of(c("temperature")),
    names_to = "attribute_name",
    values_to = "value"
  ) %>%
  dplyr::mutate(
    id_measurement = seq_len(nrow(.)) + max(d5$id_measurement),
    id_measurement_numerator =
      purrr::map_int(seq_len(nrow(.)), function(i) {
        if(attribute_name[[i]] == "mass_relative_mass") {
          id_measurement[id_sample == id_sample[[i]] & attribute_name == "mass_absolute"]
        } else if (stringr::str_detect(attribute_name[[i]], pattern = "_relative_mass$")) {
          id_measurement[id_sample == id_sample[[i]] & attribute_name == stringr::str_replace(attribute_name[[i]], "_relative_mass2?", "_absolute")]
        } else {
          NA_integer_
        }
      }),
    id_measurement_denominator =
      purrr::map_int(seq_len(nrow(.)), function(i) {
        if(attribute_name[[i]] == "mass_relative_mass") {
          id_measurement[id_sample == id_sample_incubation_start[[i]] & attribute_name == "mass_absolute"]
        } else if (stringr::str_detect(attribute_name[[i]], pattern = "_relative_mass$")) {
          id_measurement[id_sample == id_sample[[i]] & attribute_name == "mass_absolute"]
        } else {
          NA_integer_
        }
      }),
    value_type = "mean"
  )


# water content
d7 <- 
  samples7 %>%
  tidyr::pivot_longer(
    cols = dplyr::all_of(c("water_mass_absolute", "volume", "water_mass_relative_volume")),
    names_to = "attribute_name",
    values_to = "value"
  ) %>%
  dplyr::mutate(
    id_measurement = seq_len(nrow(.)) + max(d6$id_measurement),
    id_measurement_numerator =
      purrr::map_int(seq_len(nrow(.)), function(i) {
        if(attribute_name[[i]] == "water_mass_relative_volume") {
          id_measurement[id_sample == id_sample[[i]] & attribute_name == "water_mass_absolute"]
        } else {
          NA_integer_
        }
      }),
    id_measurement_denominator =
      purrr::map_int(seq_len(nrow(.)), function(i) {
        if(attribute_name[[i]] == "water_mass_relative_volume") {
          id_measurement[id_sample == id_sample[[i]] & attribute_name == "volume"]
        } else {
          NA_integer_
        }
      }),
    value_type = "mean"
  )

d7_sample_size <- 
  samples7 %>%
  tidyr::pivot_longer(
    cols = dplyr::ends_with("_sample_size"),
    names_to = "attribute_name",
    values_to = "sample_size"
  ) %>%
  dplyr::mutate(
    attribute_name = 
      attribute_name %>%
      stringr::str_remove(pattern = "_sample_size$")
  ) %>%
  dplyr::select(id_sample, attribute_name, sample_size)

d7_error <- 
  samples7 %>%
  tidyr::pivot_longer(
    cols = dplyr::ends_with(c("_error")),
    names_to = "attribute_name",
    values_to = "error"
  ) %>%
  dplyr::mutate(
    attribute_name = 
      attribute_name %>%
      stringr::str_remove(pattern = "_error$")
  ) %>%
  dplyr::select(id_sample, attribute_name, error)

d7_error_type <- 
  samples7 %>%
  tidyr::pivot_longer(
    cols = dplyr::ends_with(c("_error_type")),
    names_to = "attribute_name",
    values_to = "error_type"
  ) %>%
  dplyr::mutate(
    attribute_name = 
      attribute_name %>%
      stringr::str_remove(pattern = "_error_type$")
  ) %>%
  dplyr::select(id_sample, attribute_name, error_type)

d7 <- 
  d7 %>%
  dplyr::mutate(
    error =
      dplyr::left_join(d7, d7_error, by = c("id_sample", "attribute_name")) %>%
      dplyr::pull(error),
    error_type =
      dplyr::left_join(d7, d7_error_type, by = c("id_sample", "attribute_name")) %>%
      dplyr::pull(error_type),
    sample_size =
      dplyr::left_join(d7, d7_sample_size, by = c("id_sample", "attribute_name")) %>%
      dplyr::pull(sample_size)
  )

# combine
d <- 
  dplyr::bind_rows(
    db_template_tables$data,
    d2,
    d4,
    d5,
    d6,
    d7
  ) %>%
  dplyr::select(dplyr::all_of(colnames(db_template_tables$data)))

2.6 experimental_design_format

experimental_design_format <- 
  tibble::tibble(
    id_dataset = datasets$id_dataset,
    file = paste0(id_last$id_dataset + 1L, "/experimental_design_format.csv"),
    experimental_design_description = "`site_name`: Name of the site. `sample_treatment`: Logical value indicating whether the plot has been drained (`TRUE`) or not (`FALSE`). `distance_to_drainage_ditch`: Numerical value representing the distance to the next drainage ditch [m]."
  )

# csv file to export
experimental_design_format2 <- 
  samples %>%
  dplyr::filter(! duplicated(experimental_design) & ! is.na(experimental_design)) %>%
  dplyr::select(experimental_design, site_name, sample_treatment, distance_to_drainage_ditch)

# export
write.csv(experimental_design_format2, paste0(dir_target, "/experimental_design_format.csv"), row.names = FALSE)

3 Export to database

# list all tables
dm_insert_in <-
  list(
    datasets = 
      datasets %>% 
      dplyr::select(dplyr::all_of(colnames(dm_dpeatdecomposition$datasets))),
    samples = 
      samples %>% 
      dplyr::select(dplyr::all_of(colnames(dm_dpeatdecomposition$samples))),
    data = 
      d %>% 
      dplyr::select(dplyr::all_of(colnames(dm_dpeatdecomposition$data))),
    samples_to_samples = 
      samples_to_samples %>% 
      dplyr::select(dplyr::all_of(colnames(dm_dpeatdecomposition$samples_to_samples))),
    citations_to_datasets = 
      citations_to_datasets %>% 
      dplyr::select(dplyr::all_of(colnames(dm_dpeatdecomposition$citations_to_datasets))),
    experimental_design_format = 
      experimental_design_format %>% 
      dplyr::select(dplyr::all_of(colnames(dm_dpeatdecomposition$experimental_design_format)))
  )

# check whether all column names as present in table attributes
all_column_names <- 
  purrr::map(dm_insert_in, colnames) %>%
  unlist() %>%
  unique()

if(! all(all_column_names %in% (dm_dpeatdecomposition %>% dm::pull_tbl(attributes) %>% dplyr::pull(attribute_name)))) {
  cond <- purrr::map_lgl(all_column_names, function(.x) ! .x %in% (dm_dpeatdecomposition %>% dm::pull_tbl(attributes) %>% dplyr::pull(attribute_name)))
  RMariaDB::dbDisconnect(con)
  stop(paste0("New `attribute_name`s discovered: ", paste(all_column_names[cond], collapse = ", ")))
}

all_data_attributes <- unique(dm_insert_in$data$attribute_name)

if(! all(all_data_attributes %in% (dm_dpeatdecomposition %>% dm::pull_tbl(attributes) %>% dplyr::pull(attribute_name)))) {
  cond <- purrr::map_lgl(all_data_attributes, function(.x) ! .x %in% (dm_dpeatdecomposition %>% dm::pull_tbl(attributes) %>% dplyr::pull(attribute_name)))
  stop(paste0("New `attribute_name`s discovered: ", paste(all_data_attributes[cond], collapse = ", ")))
  RMariaDB::dbDisconnect(con)
}


# filter empty tables
dm_insert_in_check <-
  dm_insert_in[purrr::map_lgl(dm_insert_in, function(x) nrow(x) > 0)] %>%
  dm::as_dm() %>%
  dp_dm_add_keys(dm_dpeatdecomposition)

# copy into dm_pmird
for(i in seq_along(dm_insert_in)) {
  RMariaDB::dbAppendTable(con, name = names(dm_insert_in)[[i]], value = dm_insert_in[[i]])
}

RMariaDB::dbDisconnect(con)

4 Notes