# packages
library(metaDigitise)
library(magrittr)
library(tibble)
library(dplyr)
library(lubridate)
library(dpeatdecomposition)
library(dm)
library(RMariaDB)

1 Preparations

Connect to database

# connect to database
con <-
  RMariaDB::dbConnect(
    drv = RMariaDB::MariaDB(),
    dbname = "dpeatdecomposition",
    default.file = "~/my.cnf"
  )

# get database as dm object
dm_dpeatdecomposition <-
  dpeatdecomposition::dp_get_dm(con, learn_keys = TRUE)

Get most current IDs

id_last <- 
  list(
    id_dataset = 
      dm_dpeatdecomposition %>%
      dm::pull_tbl(datasets) %>%
      dplyr::pull(id_dataset) %>%
      tail(1),
    id_sample = 
      dm_dpeatdecomposition %>%
      dm::pull_tbl(samples) %>%
      dplyr::pull(id_sample) %>%
      tail(1),
    id_measurement = 
      dm_dpeatdecomposition %>%
      dm::pull_tbl(data) %>%
      dplyr::pull(id_measurement) %>%
      tail(1)
    ) %>%
  purrr::map(function(.x) {
    if(length(.x) == 0) {
      0L
    } else {
      .x
    }
  })

Create directories

dir_name <- "d26"
dir_source <- "../raw_data/data/d26"
dir_target <- paste0("../derived_data/", id_last$id_dataset + 1L)

if(!dir.exists(dir_target)) {
  dir.create(dir_target)
}

2 Data wrangling

2.1 dataset

datasets <- 
  tibble::tibble(
    id_dataset = id_last$id_dataset + 1L
  )

2.2 citations_to_datasets

citations_to_datasets <- 
  dplyr::bind_rows(
    db_template_tables$citations_to_datasets,
    tibble::tibble(
      id_dataset = datasets$id_dataset,
      id_citation = c("Limpens.2003") 
    )
  )

2.3 samples

### mass remaining

## experiment 1

# mass remaining
samples1_3 <- 
  readRDS(paste0(dir_source, "/raw/caldat/Limpens.2003-Fig1"))$processed_data %>%
  dplyr::mutate(
    id_dataset = datasets$id_dataset[[1]],
    incubation_environment = "peat",
    is_incubated = TRUE,
    id_experiment = 1,
    mass_absolute = NA_real_,
    mass_relative_mass = (100 - mean)/100,
    mass_relative_mass_error = error/100,
    mass_relative_mass_error_type = "se",
    mass_relative_mass_sample_size = 10L,
    mesh_size_absolute = 74/1000,
    incubation_duration = 365,
    sampling_date =
      as.Date("1999-01-01") + lubridate::ddays(incubation_duration),
    sampling_year = lubridate::year(sampling_date),
    sampling_month = lubridate::month(sampling_date),
    sampling_day = NA_real_,
    taxon_rank_value = "Sphagnum magellanicum",
    taxon_rank_name = "species",
    taxon_organ = "stems",
    sample_treatment = "transplanted",
    sample_treatment2 = 
      dplyr::case_when(
        stringr::str_detect(id, "control") ~ "control",
        stringr::str_detect(id, "co2") ~ "CO2_fertilization",
        stringr::str_detect(id, "N") ~ "nitrogen_fertilization",
      ),
    sample_type = 
      dplyr::case_when(
        stringr::str_detect(id, "stemy") ~ "vegetation",
        stringr::str_detect(id, "stemo") ~ "litter",
      ),
    sample_type2 = 
      dplyr::case_when(
        stringr::str_detect(id, "stemy") ~ "Still red Sphagnum magellanicum stem (1-3 cm from capitulum)",
        stringr::str_detect(id, "stemo") ~ "Brown Sphagnum magellanicum stem (5-7 cm from capitulum)",
      ),
    sample_depth_upper = 10,
    sample_depth_lower = 15,
    site_label = "Reigersplas",
    sample_microhabitat = "low hummock",
    sampling_longitude =
      "6°27'E" %>%
      sp::char2dms(chd = "°", chm = "'", chs = "''") %>%
      as.numeric(),
    sampling_latitude =
      "52°50'N" %>%
      sp::char2dms(chd = "°", chm = "'", chs = "''") %>%
      as.numeric(),
    experimental_design =
      paste0(
        1, "_", #---note: id experiment
        as.numeric(as.factor(sample_treatment2))
      ),
    comments_samples = "Coordinates denote the approximate location of the site, but not the exact sampling location."
  ) %>%
  dplyr::select(-mean, -error, -n, -variable)

# initial mass
samples1_2 <- 
  samples1_3 %>%
  dplyr::mutate(
    mass_relative_mass = 1,
    mass_relative_mass_error = 0,
    incubation_duration = 0,
    sampling_date =
      as.Date("1999-01-01") + lubridate::dyears(incubation_duration),
    sampling_year = lubridate::year(sampling_date),
    sampling_month = lubridate::month(sampling_date),
    sampling_day = NA
  )

# sample collection
samples1_1 <- 
  samples1_2 %>%
  dplyr::mutate(
    id_sample = seq_len(nrow(.)) + id_last$id_sample,
    id_sample_origin = id_sample,
    id_sample_parent = id_sample,
    id_sample_incubation_start = NA_integer_,
    sample_treatment = sample_treatment2,
    is_incubated = FALSE,
    incubation_environment = NA_character_,
    site_label = NA_character_,
    site_name = NA_character_,
    sampling_longitude = NA_real_,
    sampling_latitude = NA_real_,
    sampling_date = as.Date("1998-09-15"),
    sampling_year = lubridate::year(sampling_date),
    sampling_month = lubridate::month(sampling_date),
    sampling_day = NA_real_,
    sample_depth_upper =
      dplyr::case_when(
        stringr::str_detect(id, "stemy") ~ 1,
        stringr::str_detect(id, "stemo") ~ 5,
      ),
    sample_depth_lower =
      dplyr::case_when(
        stringr::str_detect(id, "stemy") ~ 3,
        stringr::str_detect(id, "stemo") ~ 7,
      ),
    comments_samples = "Samples are from a 3-year outdoor mesocosms with either no additional manipulation (control), N fertilization (50 kg ha$^{-1}$ yr$^{-1}$), or CO$_2$ fertilization (open top chamber, 560 ppmv)."
  )

# add missing ids
samples1_2 <- 
  dplyr::bind_rows(
    samples1_2 %>%
      dplyr::mutate(
        type = "samples1_2"
      ), 
    samples1_3 %>%
      dplyr::mutate(
        type = "samples1_3"
      )
  )

samples1_2 <- 
  samples1_2 %>%
  dplyr::mutate(
    id_sample = seq_len(nrow(.)) + max(samples1_1$id_sample),
    id_sample_origin = 
      dplyr::left_join(
        samples1_2 %>% dplyr::select(experimental_design, sample_type),
        samples1_1 %>% dplyr::select(experimental_design, sample_type, id_sample),
        by = c("experimental_design", "sample_type")
      ) %>%
      dplyr::pull(id_sample),
    id_sample_incubation_start = 
      purrr::map_int(seq_len(nrow(.)), function(i) {
        index <- paste0(taxon_rank_value, "_", sample_type) == paste0(taxon_rank_value, "_", sample_type)[[i]] & experimental_design == experimental_design[[i]] & incubation_duration == 0.0
        id_sample[index]
      }),
    id_sample_parent = 
      purrr::map_int(seq_len(nrow(.)), function(i) {
       index <- paste0(taxon_rank_value, "_", sample_type) == paste0(taxon_rank_value, "_", sample_type)[[i]] & experimental_design == experimental_design[[i]] & incubation_duration < incubation_duration[[i]]
        if(! any(index)) {
          id_sample_origin[[i]]
        } else {
          target_incubation_duration <- max(incubation_duration[index])
          index <- index & incubation_duration == target_incubation_duration
          id_sample[index]
        }
      })
  ) %>%
  dplyr::mutate(
    sample_type = "litter"
  )



## experiment 2

# mass remaining
samples2_3 <- 
  readRDS(paste0(dir_source, "/raw/caldat/Limpens.2003-Fig2"))$processed_data %>%
  dplyr::mutate(
    id_dataset = datasets$id_dataset[[1]],
    incubation_environment = "peat",
    is_incubated = TRUE,
    id_experiment = 2,
    mass_absolute = NA_real_,
    mass_relative_mass = (100 - mean)/100,
    mass_relative_mass_error = error/100,
    mass_relative_mass_error_type = "se",
    mass_relative_mass_sample_size = 10L,
    mesh_size_absolute = 74/1000,
    incubation_duration = 365,
    sampling_date =
      as.Date("1999-05-01") + lubridate::ddays(incubation_duration),
    sampling_year = lubridate::year(sampling_date),
    sampling_month = lubridate::month(sampling_date),
    sampling_day = NA_real_,
    taxon_rank_value = 
      dplyr::case_when(
        stringr::str_detect(id, "whatSphagnummagellanicum") ~ "Sphagnum magellanicum",
        stringr::str_detect(id, "whatSphagnumpapillosum") ~ "Sphagnum papillosum",
        stringr::str_detect(id, "whatSphagnumcuspidatum") ~ "Sphagnum cuspidatum",
        stringr::str_detect(id, "whatSphagnumfallax") ~ "Sphagnum fallax"
      ),
    taxon_rank_name = "species",
    sample_treatment = "transplanted",
    sample_type = 
      dplyr::case_when(
        stringr::str_detect(id, "stemy") ~ "vegetation",
        stringr::str_detect(id, "stemo") ~ "litter",
      ),
    sample_type2 = 
      dplyr::case_when(
        stringr::str_detect(id, "stemy") ~ "Still red Sphagnum magellanicum stem (1-3 cm from capitulum)",
        stringr::str_detect(id, "stemo") ~ "Brown Sphagnum magellanicum stem (5-7 cm from capitulum)",
      ),
    taxon_organ = "stems",
    sample_depth_upper = 10,
    sample_depth_lower = 15,
    site_label = "Bargveen",
    sample_microhabitat = "low_hummock and hollow-lawn",
    sampling_longitude =
      "7°03'E" %>%
      sp::char2dms(chd = "°", chm = "'", chs = "''") %>%
      as.numeric(),
    sampling_latitude =
      "52°42'N" %>%
      sp::char2dms(chd = "°", chm = "'", chs = "''") %>%
      as.numeric(),
    experimental_design =
      paste0(
        2, "_", #---note: id experiment
        as.numeric(as.factor(site_label))
      ),
    comments_samples = "Samples were incubated under natural conditions in the field in Bargerveen, for each species-stem age combination 10 litterbags in low hummock and 10 in hollow-lawn plots. Coordinates are approximate coordinates for this incubation site, but not exact sampling locations."
  ) %>%
  dplyr::select(-mean, -error, -n, -variable)

# initial mass
samples2_2 <- 
  samples2_3 %>%
  dplyr::mutate(
    mass_relative_mass = 1,
    mass_relative_mass_error = 0,
    incubation_duration = 0,
    sampling_date =
      as.Date("1998-09-01") + lubridate::dyears(incubation_duration),
    sampling_year = lubridate::year(sampling_date),
    sampling_month = lubridate::month(sampling_date),
    sampling_day = NA_real_
  )

# sample collection
samples2_1 <- 
  samples2_2 %>%
  dplyr::mutate(
    id_sample = seq_len(nrow(.)) + max(samples1_2$id_sample),
    id_sample_origin = id_sample,
    id_sample_parent = id_sample,
    id_sample_incubation_start = NA_integer_,
    sample_treatment = "control",
    is_incubated = FALSE,
    incubation_environment = NA_character_,
    site_label = "Clara Bog",
    sampling_longitude =
      "7°36'E" %>%
      sp::char2dms(chd = "°", chm = "'", chs = "''") %>%
      as.numeric(),
    sampling_latitude =
      "53°19'N" %>%
      sp::char2dms(chd = "°", chm = "'", chs = "''") %>%
      as.numeric(),
    sample_microhabitat = NA_character_,
    sampling_date = as.Date("1998-09-15"),
    sampling_year = lubridate::year(sampling_date),
    sampling_month = lubridate::month(sampling_date),
    sampling_day = NA_real_,
    sample_depth_upper =
      dplyr::case_when(
        stringr::str_detect(id, "stemy") ~ 1,
        stringr::str_detect(id, "stemo") ~ 5,
      ),
    sample_depth_lower =
      dplyr::case_when(
        stringr::str_detect(id, "stemy") ~ 3,
        stringr::str_detect(id, "stemo") ~ 7,
      ),
    comments_samples = "Coordinates are approximate coordinates for this incubation site, but not exact sampling locations."
  )

# add missing ids
samples2_2 <- 
  dplyr::bind_rows(
    samples2_2 %>%
      dplyr::mutate(
        type = "samples2_2"
      ), 
    samples2_3 %>%
      dplyr::mutate(
        type = "samples2_3"
      )
  )

samples2_2 <- 
  samples2_2 %>%
  dplyr::mutate(
    id_sample = seq_len(nrow(.)) + max(samples2_1$id_sample),
    id_sample_origin = 
      dplyr::left_join(
        samples2_2 %>% dplyr::select(experimental_design, taxon_rank_value, sample_type),
        samples2_1 %>% dplyr::select(experimental_design, taxon_rank_value, sample_type, id_sample),
        by = c("experimental_design", "taxon_rank_value", "sample_type")
      ) %>%
      dplyr::pull(id_sample),
    id_sample_incubation_start = 
      purrr::map_int(seq_len(nrow(.)), function(i) {
        index <- paste0(taxon_rank_value, "_", sample_type) == paste0(taxon_rank_value, "_", sample_type)[[i]] & experimental_design == experimental_design[[i]] & incubation_duration == 0.0
        id_sample[index]
      }),
    id_sample_parent = 
      purrr::map_int(seq_len(nrow(.)), function(i) {
       index <- paste0(taxon_rank_value, "_", sample_type) == paste0(taxon_rank_value, "_", sample_type)[[i]] & experimental_design == experimental_design[[i]] & incubation_duration < incubation_duration[[i]]
        if(! any(index)) {
          id_sample_origin[[i]]
        } else {
          target_incubation_duration <- max(incubation_duration[index])
          index <- index & incubation_duration == target_incubation_duration
          id_sample[index]
        }
      })
  ) %>%
  dplyr::mutate(
    sample_type = "litter"
  )


## experiment 3

# mass remaining
samples3_3 <- 
  readRDS(paste0(dir_source, "/raw/caldat/Limpens.2003-Fig3"))$processed_data %>%
  dplyr::mutate(
    id_dataset = datasets$id_dataset[[1]],
    incubation_environment = "peat",
    is_incubated = TRUE,
    id_experiment = 3,
    mass_absolute = NA_real_,
    mass_relative_mass = (100 - mean)/100,
    mass_relative_mass_error = error/100,
    mass_relative_mass_error_type = "se",
    mass_relative_mass_sample_size = 10L,
    mesh_size_absolute = 74/1000,
    incubation_duration = 365, #---note: not explicitly written in the text, but assumed
    sampling_date =
      as.Date("1999-12-01") + lubridate::ddays(incubation_duration),
    sampling_year = lubridate::year(sampling_date),
    sampling_month = lubridate::month(sampling_date),
    sampling_day = NA_real_,
    taxon_rank_value = 
      dplyr::case_when(
        stringr::str_detect(id, "whatSphagnumpapillosum") ~ "Sphagnum papillosum",
        stringr::str_detect(id, "whatSphagnumfallax") ~ "Sphagnum fallax",
        stringr::str_detect(id, "whatEriophorumangustifolium") ~ "Eriophorum angustifolium"
      ),
    taxon_rank_name = "species",
    sample_treatment = "transplanted",
    sample_type = 
      dplyr::case_when(
        stringr::str_detect(id, "stemy") & stringr::str_detect(id, "Sphagnum") ~ "vegetation",
        TRUE ~ "litter",
      ),
    sample_type2 = 
      dplyr::case_when(
        stringr::str_detect(id, "stemy") & stringr::str_detect(id, "Sphagnum") ~ "Still red Sphagnum magellanicum stem (1-3 cm from capitulum)",
        stringr::str_detect(id, "stemo") & stringr::str_detect(id, "Sphagnum") ~ "Brown Sphagnum magellanicum stem (5-7 cm from capitulum)",
        TRUE ~ "Eriophorum vaginatum leaf litter"
      ),
    taxon_organ = 
      dplyr::case_when(
        stringr::str_detect(id, "Sphagnum") ~ "stems",
        stringr::str_detect(id, "Eriophorum") ~ "leaves"
      ),
    sample_depth_upper = 10, #---note: assumed, the text says "about 10 cm above the highest water table"
    sample_depth_lower = 15,
    site_label = "Bargveen",
    origin_site_name = 
      dplyr::case_when(
        stringr::str_detect(id, "siteoriginNL") ~ "Reigersplas",
        stringr::str_detect(id, "siteoriginIRE") ~ "Clara Bog"
      ),
    sample_microhabitat = "low hummock",
    sampling_longitude =
      "7°03'E" %>%
      sp::char2dms(chd = "°", chm = "'", chs = "''") %>%
      as.numeric(),
    sampling_latitude =
      "52°42'N" %>%
      sp::char2dms(chd = "°", chm = "'", chs = "''") %>%
      as.numeric(),
    experimental_design =
      paste0(
        3, "_", #---note: id experiment
        as.numeric(as.factor(site_label)), "_",
        as.numeric(as.factor(origin_site_name))
      ),
    comments_samples = "Coordinates are approximate coordinates for this incubation site, but not exact sampling locations."
  ) %>%
  dplyr::select(-mean, -error, -n, -variable)

# initial mass
samples3_2 <- 
  samples3_3 %>%
  dplyr::mutate(
    mass_relative_mass = 1,
    mass_relative_mass_error = 0,
    incubation_duration = 0,
    sampling_date =
      as.Date("1999-12-01") + lubridate::dyears(incubation_duration),
    sampling_year = lubridate::year(sampling_date),
    sampling_month = lubridate::month(sampling_date),
    sampling_day = NA_real_
  )

# sample collection
samples3_1 <- 
  samples3_2 %>%
  dplyr::filter(! duplicated(paste0(origin_site_name, "_", taxon_rank_value, "_", sample_type))) %>%
  dplyr::mutate(
    id_sample = seq_len(nrow(.)) + max(samples2_2$id_sample),
    id_sample_origin = id_sample,
    id_sample_parent = id_sample,
    id_sample_incubation_start = NA_integer_,
    sample_treatment = "control",
    is_incubated = FALSE,
    incubation_environment = NA_character_,
    site_label = origin_site_name,
    sampling_longitude =
      dplyr::case_when(
        origin_site_name == "Reigersplas" ~ "6°27'E",
        origin_site_name == "Clara Bog" ~ "7°36'E"
      ) %>%
      sp::char2dms(chd = "°", chm = "'", chs = "''") %>%
      as.numeric(),
    sampling_latitude =
      dplyr::case_when(
        origin_site_name == "Reigersplas" ~ "53°19'N",
        origin_site_name == "Clara Bog" ~ "52°50'N"
      ) %>%
      sp::char2dms(chd = "°", chm = "'", chs = "''") %>%
      as.numeric(),
    sample_microhabitat = NA_character_,
    sampling_date = as.Date("1999-08-15"),
    sampling_year = lubridate::year(sampling_date),
    sampling_month = lubridate::month(sampling_date),
    sampling_day = NA_real_,
    sample_depth_upper =
      dplyr::case_when(
        stringr::str_detect(id, "stemy") & stringr::str_detect(id, "Sphagnum") ~ 1,
        stringr::str_detect(id, "stemo") & stringr::str_detect(id, "Sphagnum") ~ 5,
        TRUE ~ NA_real_
      ),
    sample_depth_lower =
      dplyr::case_when(
        stringr::str_detect(id, "stemy") & stringr::str_detect(id, "Sphagnum") ~ 3,
        stringr::str_detect(id, "stemo") & stringr::str_detect(id, "Sphagnum") ~ 7,
        TRUE ~ NA_real_
      ),
    experimental_design = NA_character_,
    comments_samples = "Coordinates are approximate coordinates for this incubation site, but not exact sampling locations."
  )

# add missing ids
samples3_2 <- 
  dplyr::bind_rows(
    samples3_2 %>%
      dplyr::mutate(
        type = "samples3_2"
      ), 
    samples3_3 %>%
      dplyr::mutate(
        type = "samples3_3"
      )
  )

samples3_2 <- 
  samples3_2 %>%
  dplyr::mutate(
    id_sample = seq_len(nrow(.)) + max(samples3_1$id_sample),
    id_sample_origin = 
      dplyr::left_join(
        samples3_2 %>% dplyr::select(origin_site_name, taxon_rank_value, sample_type),
        samples3_1 %>% dplyr::select(origin_site_name, taxon_rank_value, sample_type, id_sample),
        by = c("origin_site_name", "taxon_rank_value", "sample_type")
      ) %>%
      dplyr::pull(id_sample),
    id_sample_incubation_start = 
      purrr::map_int(seq_len(nrow(.)), function(i) {
        index <- paste0(taxon_rank_value, "_", sample_type) == paste0(taxon_rank_value, "_", sample_type)[[i]] & experimental_design == experimental_design[[i]] & incubation_duration == 0.0
        id_sample[index]
      }),
    id_sample_parent = 
      purrr::map_int(seq_len(nrow(.)), function(i) {
       index <- paste0(taxon_rank_value, "_", sample_type) == paste0(taxon_rank_value, "_", sample_type)[[i]] & experimental_design == experimental_design[[i]] & incubation_duration < incubation_duration[[i]]
        if(! any(index)) {
          id_sample_origin[[i]]
        } else {
          target_incubation_duration <- max(incubation_duration[index])
          index <- index & incubation_duration == target_incubation_duration
          id_sample[index]
        }
      })
  ) %>%
  dplyr::mutate(
    sample_type = "litter"
  )



## experiment 4

samples4_3 <- 
  readRDS(paste0(dir_source, "/raw/caldat/Limpens.2003-Fig4"))$processed_data %>%
  dplyr::mutate(
    id_dataset = datasets$id_dataset[[1]],
    incubation_environment = "peat",
    is_incubated = TRUE,
    id_experiment = 4,
    mass_absolute = NA_real_,
    mass_relative_mass = (100 - mean)/100,
    mass_relative_mass_error = error/100,
    mass_relative_mass_error_type = "se",
    mass_relative_mass_sample_size = 5L,
    mesh_size_absolute = 74/1000,
    incubation_duration = 365,
    sampling_date =
      as.Date(as.Date("1999-12-01") + lubridate::ddays(incubation_duration)),
    sampling_year = lubridate::year(sampling_date),
    sampling_month = lubridate::month(sampling_date),
    sampling_day = NA_real_,
    taxon_rank_value = 
      dplyr::case_when(
        stringr::str_detect(id, "whatSphagnumpapillosum") ~ "Sphagnum papillosum",
        stringr::str_detect(id, "whatSphagnumfallax") ~ "Sphagnum fallax",
        stringr::str_detect(id, "whatEriophorumangustifolium") ~ "Eriophorum angustifolium"
      ),
    taxon_rank_name = "species",
    sample_treatment = 
      dplyr::case_when(
        stringr::str_detect(id, "treatmentN0") ~ "mesocosm_control",
        stringr::str_detect(id, "treatmentN40") ~ "mesocosm_nitrogen_fertilized_40kghayr",
        stringr::str_detect(id, "treatmentN80") ~ "mesocosm_nitrogen_fertilized_80kghayr"
      ),
    sample_type = 
      dplyr::case_when(
        stringr::str_detect(id, "stemy") & stringr::str_detect(id, "Sphagnum") ~ "vegetation",
        TRUE ~ "litter",
      ),
    sample_type2 = 
      dplyr::case_when(
        stringr::str_detect(id, "stemy") & stringr::str_detect(id, "Sphagnum") ~ "Still red Sphagnum magellanicum stem (1-3 cm from capitulum)",
        stringr::str_detect(id, "stemo") & stringr::str_detect(id, "Sphagnum") ~ "Brown Sphagnum magellanicum stem (5-7 cm from capitulum)",
        TRUE ~ "Eriophorum vaginatum leaf litter"
      ),
    taxon_organ = 
      dplyr::case_when(
        stringr::str_detect(id, "Sphagnum") ~ "stems",
        stringr::str_detect(id, "Eriophorum") ~ "leaves"
      ),
    sample_depth_upper = 7,
    sample_depth_lower = 10,
    origin_sample_depth_upper =
      dplyr::case_when(
        stringr::str_detect(id, "stemy") & stringr::str_detect(id, "Sphagnum") ~ 1,
        stringr::str_detect(id, "stemo") & stringr::str_detect(id, "Sphagnum") ~ 5,
        TRUE ~ NA_real_
      ),
    origin_sample_depth_lower =
      dplyr::case_when(
        stringr::str_detect(id, "stemy") & stringr::str_detect(id, "Sphagnum") ~ 3,
        stringr::str_detect(id, "stemo") & stringr::str_detect(id, "Sphagnum") ~ 7,
        TRUE ~ NA_real_
      ),
    site_name = NA, #---note: greenhouse mesocosm
    origin_site_name = 
      dplyr::case_when(
        stringr::str_detect(id, "siteoriginNL") ~ "Reigersplas",
        stringr::str_detect(id, "siteoriginIRE") ~ "Clara Bog"
      ),
    microtopography = NA,
    sampling_longitude =
      "7°03'E" %>%
      sp::char2dms(chd = "°", chm = "'", chs = "''") %>%
      as.numeric(),
    sampling_latitude =
      "52°42'N" %>%
      sp::char2dms(chd = "°", chm = "'", chs = "''") %>%
      as.numeric(),
    origin_sampling_longitude =
      dplyr::case_when(
        origin_site_name == "Reigersplas" ~ "6°27'E",
        origin_site_name == "Clara Bog" ~ "7°36'E"
      ) %>%
      sp::char2dms(chd = "°", chm = "'", chs = "''") %>%
      as.numeric(),
    origin_sampling_latitude =
      dplyr::case_when(
        origin_site_name == "Reigersplas" ~ "53°19'N",
        origin_site_name == "Clara Bog" ~ "52°50'N"
      ) %>%
      sp::char2dms(chd = "°", chm = "'", chs = "''") %>%
      as.numeric(),
    experimental_design =
      paste0(
        4, "_", #---note: id experiment
        as.numeric(as.factor(sample_treatment)), "_",
        as.numeric(as.factor(origin_site_name))
      ),
    water_table_depth = 8,
    comment_samples = "Samples were incubated in field mesocosms in the field under a roof at an unknown location. Mesocosms either received no additional treatment, or N fertilization of 40 or 80 kg N ha$^{-1}$ yr$^{-1}$."
  ) %>%
  dplyr::select(-mean, -error, -n, -variable)

# initial mass
samples4_2 <- 
  samples4_3 %>%
  dplyr::mutate(
    mass_relative_mass = 1,
    mass_relative_mass_error = 0,
    incubation_duration = 0,
    sampling_date =
      as.Date("1999-12-01") + lubridate::dyears(incubation_duration),
    sampling_year = lubridate::year(sampling_date),
    sampling_month = lubridate::month(sampling_date),
    sampling_day = NA_real_
  )

# sample collection
samples4_1 <- 
  samples3_1 %>%
  dplyr::filter(paste0(origin_site_name, "_", taxon_rank_value, "_", sample_type) %in% paste0(samples4_3$origin_site_name, "_", samples4_3$taxon_rank_value, "_", samples4_3$sample_type))

# add missing ids
samples4_2 <- 
  dplyr::bind_rows(
    samples4_2 %>%
      dplyr::mutate(
        type = "samples4_2"
      ), 
    samples4_3 %>%
      dplyr::mutate(
        type = "samples4_3"
      )
  )

samples4_2 <- 
  samples4_2 %>%
  dplyr::mutate(
    id_sample = seq_len(nrow(.)) + max(samples3_2$id_sample),
    id_sample_origin = 
      dplyr::left_join(
        samples4_2 %>% dplyr::select(origin_site_name, taxon_rank_value, sample_type),
        samples4_1 %>% dplyr::select(origin_site_name, taxon_rank_value, sample_type, id_sample),
        by = c("origin_site_name", "taxon_rank_value", "sample_type")
      ) %>%
      dplyr::pull(id_sample),
    id_sample_incubation_start = 
      purrr::map_int(seq_len(nrow(.)), function(i) {
        index <- paste0(taxon_rank_value, "_", sample_type) == paste0(taxon_rank_value, "_", sample_type)[[i]] & experimental_design == experimental_design[[i]] & incubation_duration == 0.0
        id_sample[index]
      }),
    id_sample_parent = 
      purrr::map_int(seq_len(nrow(.)), function(i) {
       index <- paste0(taxon_rank_value, "_", sample_type) == paste0(taxon_rank_value, "_", sample_type)[[i]] & experimental_design == experimental_design[[i]] & incubation_duration < incubation_duration[[i]]
        if(! any(index)) {
          id_sample_origin[[i]]
        } else {
          target_incubation_duration <- max(incubation_duration[index])
          index <- index & incubation_duration == target_incubation_duration
          id_sample[index]
        }
      })
  ) %>%
  dplyr::mutate(
    sample_type = "litter"
  )

  
## combine
samples <- 
  dplyr::bind_rows(
    db_template_tables$samples,
    samples1_1 %>%
      dplyr::mutate(
        type = "samples1_1"
      ),
    samples1_2,
    samples2_1 %>%
      dplyr::mutate(
        type = "samples2_1"
      ),
    samples2_2,
    samples3_1 %>%
      dplyr::mutate(
        type = "samples3_1"
      ),
    samples3_2,
    samples4_2 #---note: no samples4_1 here because this is identical to a subset of samples 3_1
  ) %>%
  dplyr::mutate(
    site_name = site_label
  )

2.4 samples_to_samples

samples_to_samples <- 
  samples %>%
  dplyr::filter(! id_sample %in% id_sample_origin) %>%
  dplyr::mutate(
    transition_description =
      dplyr::case_when(
        type %in% c("samples1_2", "samples2_2", "samples3_2","samples4_2") ~ "translocate",
        type %in% c("samples1_3", "samples2_3", "samples3_3","samples4_3") ~ "wait",
        TRUE ~ NA_character_
      )
  ) %>%
  dplyr::select(id_sample_parent, id_sample, transition_description) %>%
  dplyr::rename(
    id_sample_child = "id_sample"
  )

2.5 data

# experiment 1
d1_2 <- 
  samples1_2 %>%
  tidyr::pivot_longer(
    cols = dplyr::all_of(c("mass_absolute", "mass_relative_mass", "mesh_size_absolute")),
    names_to = "attribute_name",
    values_to = "value"
  ) %>%
  dplyr::mutate(
    id_measurement = seq_len(nrow(.)) + id_last$id_measurement,
    id_measurement_numerator =
      purrr::map_int(seq_len(nrow(.)), function(i) {
        switch(
          attribute_name[[i]],
          "mass_relative_mass" = {
            id_measurement[id_sample == id_sample[[i]] & attribute_name == "mass_absolute"]
          },
          NA_integer_
        )
      }),
    id_measurement_denominator =
      purrr::map_int(seq_len(nrow(.)), function(i) {
        switch(
          attribute_name[[i]],
          "mass_relative_mass" = {
            id_measurement[id_sample == id_sample_incubation_start[[i]] & attribute_name == "mass_absolute"]
          },
          NA_integer_
        )
      }),
    value_type = 
      dplyr::case_when(
        attribute_name == "mesh_size_absolute" ~ "point", 
        TRUE ~ "mean"
      )
  )

d1_2_sample_size <- 
  samples1_2 %>%
  tidyr::pivot_longer(
    cols = dplyr::ends_with("_sample_size"),
    names_to = "attribute_name",
    values_to = "sample_size"
  ) %>%
  dplyr::mutate(
    attribute_name = 
      attribute_name %>%
      stringr::str_remove(pattern = "_sample_size$")
  ) %>%
  dplyr::select(id_sample, attribute_name, sample_size)

d1_2_error <- 
  samples1_2 %>%
  tidyr::pivot_longer(
    cols = dplyr::ends_with(c("_error")),
    names_to = "attribute_name",
    values_to = "error"
  ) %>%
  dplyr::mutate(
    attribute_name = 
      attribute_name %>%
      stringr::str_remove(pattern = "_error$")
  ) %>%
  dplyr::select(id_sample, attribute_name, error)

d1_2_error_type <- 
  samples1_2 %>%
  tidyr::pivot_longer(
    cols = dplyr::ends_with(c("_error_type")),
    names_to = "attribute_name",
    values_to = "error_type"
  ) %>%
  dplyr::mutate(
    attribute_name = 
      attribute_name %>%
      stringr::str_remove(pattern = "_error_type$")
  ) %>%
  dplyr::select(id_sample, attribute_name, error_type)

d1_2 <- 
  d1_2 %>%
  dplyr::mutate(
    error =
      dplyr::left_join(d1_2, d1_2_error, by = c("id_sample", "attribute_name")) %>%
      dplyr::pull(error),
    error_type =
      dplyr::left_join(d1_2, d1_2_error_type, by = c("id_sample", "attribute_name")) %>%
      dplyr::pull(error_type),
    sample_size =
      dplyr::left_join(d1_2, d1_2_sample_size, by = c("id_sample", "attribute_name")) %>%
      dplyr::pull(sample_size)
  )


# experiment 2
d2_2 <- 
  samples2_2 %>%
  tidyr::pivot_longer(
    cols = dplyr::all_of(c("mass_absolute", "mass_relative_mass", "mesh_size_absolute")),
    names_to = "attribute_name",
    values_to = "value"
  ) %>%
  dplyr::mutate(
    id_measurement = seq_len(nrow(.)) + max(d1_2$id_measurement),
    id_measurement_numerator =
      purrr::map_int(seq_len(nrow(.)), function(i) {
        switch(
          attribute_name[[i]],
          "mass_relative_mass" = {
            id_measurement[id_sample == id_sample[[i]] & attribute_name == "mass_absolute"]
          },
          NA_integer_
        )
      }),
    id_measurement_denominator =
      purrr::map_int(seq_len(nrow(.)), function(i) {
        switch(
          attribute_name[[i]],
          "mass_relative_mass" = {
            id_measurement[id_sample == id_sample_incubation_start[[i]] & attribute_name == "mass_absolute"]
          },
          NA_integer_
        )
      }),
    value_type = 
      dplyr::case_when(
        attribute_name == "mesh_size_absolute" ~ "point", 
        TRUE ~ "mean"
      )
  )

d2_2_sample_size <- 
  samples2_2 %>%
  tidyr::pivot_longer(
    cols = dplyr::ends_with("_sample_size"),
    names_to = "attribute_name",
    values_to = "sample_size"
  ) %>%
  dplyr::mutate(
    attribute_name = 
      attribute_name %>%
      stringr::str_remove(pattern = "_sample_size$")
  ) %>%
  dplyr::select(id_sample, attribute_name, sample_size)

d2_2_error <- 
  samples2_2 %>%
  tidyr::pivot_longer(
    cols = dplyr::ends_with(c("_error")),
    names_to = "attribute_name",
    values_to = "error"
  ) %>%
  dplyr::mutate(
    attribute_name = 
      attribute_name %>%
      stringr::str_remove(pattern = "_error$")
  ) %>%
  dplyr::select(id_sample, attribute_name, error)

d2_2_error_type <- 
  samples2_2 %>%
  tidyr::pivot_longer(
    cols = dplyr::ends_with(c("_error_type")),
    names_to = "attribute_name",
    values_to = "error_type"
  ) %>%
  dplyr::mutate(
    attribute_name = 
      attribute_name %>%
      stringr::str_remove(pattern = "_error_type$")
  ) %>%
  dplyr::select(id_sample, attribute_name, error_type)

d2_2 <- 
  d2_2 %>%
  dplyr::mutate(
    error =
      dplyr::left_join(d2_2, d2_2_error, by = c("id_sample", "attribute_name")) %>%
      dplyr::pull(error),
    error_type =
      dplyr::left_join(d2_2, d2_2_error_type, by = c("id_sample", "attribute_name")) %>%
      dplyr::pull(error_type),
    sample_size =
      dplyr::left_join(d2_2, d2_2_sample_size, by = c("id_sample", "attribute_name")) %>%
      dplyr::pull(sample_size)
  )



# experiment 3
d3_2 <- 
  samples3_2 %>%
  tidyr::pivot_longer(
    cols = dplyr::all_of(c("mass_absolute", "mass_relative_mass", "mesh_size_absolute")),
    names_to = "attribute_name",
    values_to = "value"
  ) %>%
  dplyr::mutate(
    id_measurement = seq_len(nrow(.)) + max(d2_2$id_measurement),
    id_measurement_numerator =
      purrr::map_int(seq_len(nrow(.)), function(i) {
        switch(
          attribute_name[[i]],
          "mass_relative_mass" = {
            id_measurement[id_sample == id_sample[[i]] & attribute_name == "mass_absolute"]
          },
          NA_integer_
        )
      }),
    id_measurement_denominator =
      purrr::map_int(seq_len(nrow(.)), function(i) {
        switch(
          attribute_name[[i]],
          "mass_relative_mass" = {
            id_measurement[id_sample == id_sample_incubation_start[[i]] & attribute_name == "mass_absolute"]
          },
          NA_integer_
        )
      }),
    value_type = 
      dplyr::case_when(
        attribute_name == "mesh_size_absolute" ~ "point", 
        TRUE ~ "mean"
      )
  )

d3_2_sample_size <- 
  samples3_2 %>%
  tidyr::pivot_longer(
    cols = dplyr::ends_with("_sample_size"),
    names_to = "attribute_name",
    values_to = "sample_size"
  ) %>%
  dplyr::mutate(
    attribute_name = 
      attribute_name %>%
      stringr::str_remove(pattern = "_sample_size$")
  ) %>%
  dplyr::select(id_sample, attribute_name, sample_size)

d3_2_error <- 
  samples3_2 %>%
  tidyr::pivot_longer(
    cols = dplyr::ends_with(c("_error")),
    names_to = "attribute_name",
    values_to = "error"
  ) %>%
  dplyr::mutate(
    attribute_name = 
      attribute_name %>%
      stringr::str_remove(pattern = "_error$")
  ) %>%
  dplyr::select(id_sample, attribute_name, error)

d3_2_error_type <- 
  samples3_2 %>%
  tidyr::pivot_longer(
    cols = dplyr::ends_with(c("_error_type")),
    names_to = "attribute_name",
    values_to = "error_type"
  ) %>%
  dplyr::mutate(
    attribute_name = 
      attribute_name %>%
      stringr::str_remove(pattern = "_error_type$")
  ) %>%
  dplyr::select(id_sample, attribute_name, error_type)

d3_2 <- 
  d3_2 %>%
  dplyr::mutate(
    error =
      dplyr::left_join(d3_2, d3_2_error, by = c("id_sample", "attribute_name")) %>%
      dplyr::pull(error),
    error_type =
      dplyr::left_join(d3_2, d3_2_error_type, by = c("id_sample", "attribute_name")) %>%
      dplyr::pull(error_type),
    sample_size =
      dplyr::left_join(d3_2, d3_2_sample_size, by = c("id_sample", "attribute_name")) %>%
      dplyr::pull(sample_size)
  )



# experiment 4
d4_2 <- 
  samples4_2 %>%
  tidyr::pivot_longer(
    cols = dplyr::all_of(c("mass_absolute", "mass_relative_mass", "mesh_size_absolute", "water_table_depth")),
    names_to = "attribute_name",
    values_to = "value"
  ) %>%
  dplyr::mutate(
    id_measurement = seq_len(nrow(.)) + max(d3_2$id_measurement),
    id_measurement_numerator =
      purrr::map_int(seq_len(nrow(.)), function(i) {
        switch(
          attribute_name[[i]],
          "mass_relative_mass" = {
            id_measurement[id_sample == id_sample[[i]] & attribute_name == "mass_absolute"]
          },
          NA_integer_
        )
      }),
    id_measurement_denominator =
      purrr::map_int(seq_len(nrow(.)), function(i) {
        switch(
          attribute_name[[i]],
          "mass_relative_mass" = {
            id_measurement[id_sample == id_sample_incubation_start[[i]] & attribute_name == "mass_absolute"]
          },
          NA_integer_
        )
      }),
    value_type = 
      dplyr::case_when(
        attribute_name == "mesh_size_absolute" ~ "point", 
        TRUE ~ "mean"
      )
  )

d4_2_sample_size <- 
  samples4_2 %>%
  tidyr::pivot_longer(
    cols = dplyr::ends_with("_sample_size"),
    names_to = "attribute_name",
    values_to = "sample_size"
  ) %>%
  dplyr::mutate(
    attribute_name = 
      attribute_name %>%
      stringr::str_remove(pattern = "_sample_size$")
  ) %>%
  dplyr::select(id_sample, attribute_name, sample_size)

d4_2_error <- 
  samples4_2 %>%
  tidyr::pivot_longer(
    cols = dplyr::ends_with(c("_error")),
    names_to = "attribute_name",
    values_to = "error"
  ) %>%
  dplyr::mutate(
    attribute_name = 
      attribute_name %>%
      stringr::str_remove(pattern = "_error$")
  ) %>%
  dplyr::select(id_sample, attribute_name, error)

d4_2_error_type <- 
  samples4_2 %>%
  tidyr::pivot_longer(
    cols = dplyr::ends_with(c("_error_type")),
    names_to = "attribute_name",
    values_to = "error_type"
  ) %>%
  dplyr::mutate(
    attribute_name = 
      attribute_name %>%
      stringr::str_remove(pattern = "_error_type$")
  ) %>%
  dplyr::select(id_sample, attribute_name, error_type)

d4_2 <- 
  d4_2 %>%
  dplyr::mutate(
    error =
      dplyr::left_join(d4_2, d4_2_error, by = c("id_sample", "attribute_name")) %>%
      dplyr::pull(error),
    error_type =
      dplyr::left_join(d4_2, d4_2_error_type, by = c("id_sample", "attribute_name")) %>%
      dplyr::pull(error_type),
    sample_size =
      dplyr::left_join(d4_2, d4_2_sample_size, by = c("id_sample", "attribute_name")) %>%
      dplyr::pull(sample_size)
  )



# combine
d <- 
  dplyr::bind_rows(
    db_template_tables$data,
    d1_2,
    d2_2,
    d3_2,
    d4_2
  ) %>%
  dplyr::select(dplyr::all_of(colnames(db_template_tables$data)))

2.6 experimental_design_format

experimental_design_format <- 
  tibble::tibble(
    id_dataset = datasets$id_dataset,
    file = paste0(id_last$id_dataset + 1L, "/experimental_design_format.csv"),
     experimental_design_description = "`id_experiment`: An identifier for the experiment (see the article for details). `sample_treatment`: A description of the treatment (see the article for details). `site_name`: Name of the site. `origin_site_name`: Name of the site where the samples were collected from (see the article for details)."
  )

# csv file to export
experimental_design_format2 <- 
  samples %>%
  dplyr::filter(! is.na(experimental_design)) %>%
  dplyr::filter(! duplicated(experimental_design)) %>%
  dplyr::select(experimental_design, id_experiment, sample_treatment, site_name, origin_site_name)

# export
write.csv(experimental_design_format2, paste0(dir_target, "/experimental_design_format.csv"), row.names = FALSE)

3 Export to database

# list all tables
dm_insert_in <-
  list(
    datasets = 
      datasets %>% 
      dplyr::select(dplyr::all_of(colnames(dm_dpeatdecomposition$datasets))),
    samples = 
      samples %>% 
      dplyr::select(dplyr::all_of(colnames(dm_dpeatdecomposition$samples))),
    data = 
      d %>% 
      dplyr::select(dplyr::all_of(colnames(dm_dpeatdecomposition$data))),
    samples_to_samples = 
      samples_to_samples %>% 
      dplyr::select(dplyr::all_of(colnames(dm_dpeatdecomposition$samples_to_samples))),
    citations_to_datasets = 
      citations_to_datasets %>% 
      dplyr::select(dplyr::all_of(colnames(dm_dpeatdecomposition$citations_to_datasets))),
    experimental_design_format = 
      experimental_design_format %>% 
      dplyr::select(dplyr::all_of(colnames(dm_dpeatdecomposition$experimental_design_format)))
  )

# check whether all column names as present in table attributes
all_column_names <- 
  purrr::map(dm_insert_in, colnames) %>%
  unlist() %>%
  unique()

if(! all(all_column_names %in% (dm_dpeatdecomposition %>% dm::pull_tbl(attributes) %>% dplyr::pull(attribute_name)))) {
  cond <- purrr::map_lgl(all_column_names, function(.x) ! .x %in% (dm_dpeatdecomposition %>% dm::pull_tbl(attributes) %>% dplyr::pull(attribute_name)))
  RMariaDB::dbDisconnect(con)
  stop(paste0("New `attribute_name`s discovered: ", paste(all_column_names[cond], collapse = ", ")))
}

all_data_attributes <- unique(dm_insert_in$data$attribute_name)

if(! all(all_data_attributes %in% (dm_dpeatdecomposition %>% dm::pull_tbl(attributes) %>% dplyr::pull(attribute_name)))) {
  RMariaDB::dbDisconnect(con)
  cond <- purrr::map_lgl(all_data_attributes, function(.x) ! .x %in% (dm_dpeatdecomposition %>% dm::pull_tbl(attributes) %>% dplyr::pull(attribute_name)))
  stop(paste0("New `attribute_name`s discovered: ", paste(all_data_attributes[cond], collapse = ", ")))
}


# filter empty tables
dm_insert_in_check <-
  dm_insert_in[purrr::map_lgl(dm_insert_in, function(x) nrow(x) > 0)] %>%
  dm::as_dm() %>%
  dp_dm_add_keys(dm_dpeatdecomposition)

# copy into dm_pmird
for(i in seq_along(dm_insert_in)) {
  RMariaDB::dbAppendTable(con, name = names(dm_insert_in)[[i]], value = dm_insert_in[[i]])
}

RMariaDB::dbDisconnect(con)

4 Notes