# packages
library(metaDigitise)
library(magrittr)
library(tibble)
library(dplyr)
library(lubridate)
library(dpeatdecomposition)
library(dm)
library(RMariaDB)
library(ggplot2)

1 Preparations

Connect to database

# connect to database
con <-
  RMariaDB::dbConnect(
    drv = RMariaDB::MariaDB(),
    dbname = "dpeatdecomposition",
    default.file = "~/my.cnf"
  )

# get database as dm object
dm_dpeatdecomposition <-
  dpeatdecomposition::dp_get_dm(con, learn_keys = TRUE)

Get most current IDs

id_last <- 
  list(
    id_dataset = 
      dm_dpeatdecomposition %>%
      dm::pull_tbl(datasets) %>%
      dplyr::pull(id_dataset) %>%
      tail(1),
    id_sample = 
      dm_dpeatdecomposition %>%
      dm::pull_tbl(samples) %>%
      dplyr::pull(id_sample) %>%
      tail(1),
    id_measurement = 
      dm_dpeatdecomposition %>%
      dm::pull_tbl(data) %>%
      dplyr::pull(id_measurement) %>%
      tail(1)
    ) %>%
  purrr::map(function(.x) {
    if(length(.x) == 0) {
      0L
    } else {
      .x
    }
  })

Create directories

dir_name <- "d49"
dir_source <- "../raw_data/data/d49"
dir_target <- paste0("../derived_data/", id_last$id_dataset + 1L)

if(!dir.exists(dir_target)) {
  dir.create(dir_target)
}

2 Data wrangling

2.1 dataset

datasets <- 
  tibble::tibble(
    id_dataset = id_last$id_dataset + 1L
  )

2.2 citations_to_datasets

citations_to_datasets <- 
  dplyr::bind_rows(
    db_template_tables$citations_to_datasets,
    tibble::tibble(
      id_dataset = datasets$id_dataset,
      id_citation = c("Manninen.2016") 
    )
  )

2.3 samples

# mass remaining (2013 experiment)
samples2 <- 
  dplyr::bind_rows(
    # Sphagnum capillifolium
    readxl::read_xlsx(paste0(dir_source, "/raw/Moss bags_September 2013-2014.xlsx"), sheet = 1L, skip = 4L) %>%
      dplyr::select(1:11) %>%
      dplyr::slice(1:26) %>%
      setNames(nm = c("origin_sample_treatment", "id_plot", "plot_label", paste0("id_replicate_", 1:8))) %>%
      dplyr::mutate(
        dplyr::across(dplyr::starts_with("id_replicate"), as.numeric)
      ) %>%
      tidyr::fill(
        origin_sample_treatment, .direction = "down"
      ) %>%
      tidyr::pivot_longer(
        cols = dplyr::starts_with("id_replicate_"),
        names_to = "id_replicate",
        values_to = "mass_absolute"
      ) %>%
      dplyr::mutate(
        taxon_rank_value = "Sphagnum capillifolium"
      ),
    # Sphagnum papillosum
    readxl::read_xlsx(paste0(dir_source, "/raw/Moss bags_September 2013-2014.xlsx"), sheet = 1L, skip = 4L) %>%
      dplyr::select(1:13) %>%
      dplyr::slice(33:40) %>%
      setNames(nm = c("origin_sample_treatment", "id_plot", "plot_label", paste0("id_replicate_", 1:10))) %>%
      dplyr::mutate(
        dplyr::across(dplyr::starts_with("id_replicate"), as.numeric)
      ) %>%
      tidyr::fill(
        origin_sample_treatment, .direction = "down"
      ) %>%
      tidyr::pivot_longer(
        cols = dplyr::starts_with("id_replicate_"),
        names_to = "id_replicate",
        values_to = "mass_absolute"
      ) %>%
      dplyr::mutate(
        taxon_rank_value = "Sphagnum papillosum"
      ),
  ) %>%
  dplyr::mutate(
    origin_sample_treatment =
      dplyr::case_when(
        origin_sample_treatment == "NO364" ~ "NO3 64",
        TRUE ~ origin_sample_treatment
      ),
    mass_relative_mass =
      ifelse(is.na(mass_absolute), NA_real_, 1.0),
    id_replicate = 
      id_replicate %>%
      stringr::str_remove("^id_replicate_") %>%
      as.numeric(),
    sample_treatment = "control", #---note: samples incubate in control treatment
    sample_type = "litter",
    taxon_rank_name = "species",
    taxon_organ = "shoots",
    mesh_size_absolute = 0.71,
    sampling_date = as.Date("2013-09-09"),
    incubation_duration = 
      0 %>%
      lubridate::dyears() %>%
      lubridate::time_length(unit = "days"),
    sampling_year = lubridate::year(sampling_date),
    sampling_month = lubridate::month(sampling_date),
    sampling_day = lubridate::day(sampling_date),
    sample_microhabitat = "hummock",
    sample_depth_upper = 3,
    sample_depth_lower = 8,
    id_dataset = datasets$id_dataset[[1]],
    is_incubated = TRUE,
    incubation_environment = "peat",
    site_name = "Whim bog",
    sampling_longitude = -3.271656,
    sampling_latitude = 55.765479,
    experimental_design =
      paste0(
        as.numeric(as.factor(site_name)), "_",
        as.numeric(as.factor(origin_sample_treatment)), "_",
        as.numeric(as.factor(sample_treatment)), "_",
        as.numeric(as.factor(id_plot)), "_",
        as.numeric(as.factor(id_replicate))
      ),
    comments_samples = "Coordinates are only the approximate location of the site, but not the exact sampling point."
  )
## New names:
## New names:
## • `treatment` -> `treatment...1`
## • `plot` -> `plot...2`
## • `letter code` -> `letter code...3`
## • `1` -> `1...4`
## • `2` -> `2...5`
## • `3` -> `3...6`
## • `4` -> `4...7`
## • `5` -> `5...8`
## • `6` -> `6...9`
## • `7` -> `7...10`
## • `8` -> `8...11`
## • `` -> `...12`
## • `` -> `...13`
## • `` -> `...14`
## • `` -> `...15`
## • `treatment` -> `treatment...16`
## • `plot` -> `plot...17`
## • `letter code` -> `letter code...18`
## • `1` -> `1...19`
## • `2` -> `2...20`
## • `3` -> `3...21`
## • `4` -> `4...22`
## • `5` -> `5...23`
## • `6` -> `6...24`
## • `7` -> `7...25`
## • `8` -> `8...26`
## • `` -> `...27`
## • `` -> `...28`
## • `` -> `...29`
## • `treatment` -> `treatment...30`
## • `plot` -> `plot...31`
## • `letter code` -> `letter code...32`
## • `1` -> `1...33`
## • `2` -> `2...34`
## • `3` -> `3...35`
## • `4` -> `4...36`
## • `5` -> `5...37`
## • `6` -> `6...38`
## • `7` -> `7...39`
## • `8` -> `8...40`
## • `` -> `...41`
## • `` -> `...42`
samples3 <- 
  dplyr::bind_rows(
    # Sphagnum capillifolium
    readxl::read_xlsx(paste0(dir_source, "/raw/Moss bags_September 2013-2014.xlsx"), sheet = 1L, skip = 4L) %>%
      dplyr::select(16:26) %>%
      dplyr::slice(1:26) %>%
      setNames(nm = c("origin_sample_treatment", "id_plot", "plot_label", paste0("id_replicate_", 1:8))) %>%
      dplyr::mutate(
        dplyr::across(dplyr::starts_with("id_replicate"), as.numeric)
      ) %>%
      tidyr::fill(
        origin_sample_treatment, .direction = "down"
      ) %>%
      tidyr::pivot_longer(
        cols = dplyr::starts_with("id_replicate_"),
        names_to = "id_replicate",
        values_to = "mass_absolute"
      ) %>%
      dplyr::mutate(
        taxon_rank_value = "Sphagnum capillifolium"
      ),
    # Sphagnum papillosum
    readxl::read_xlsx(paste0(dir_source, "/raw/Moss bags_September 2013-2014.xlsx"), sheet = 1L, skip = 4L) %>%
      dplyr::select(16:28) %>%
      dplyr::slice(33:40) %>%
      setNames(nm = c("origin_sample_treatment", "id_plot", "plot_label", paste0("id_replicate_", 1:10))) %>%
      dplyr::mutate(
        dplyr::across(dplyr::starts_with("id_replicate"), as.numeric)
      ) %>%
      tidyr::fill(
        origin_sample_treatment, .direction = "down"
      ) %>%
      tidyr::pivot_longer(
        cols = dplyr::starts_with("id_replicate_"),
        names_to = "id_replicate",
        values_to = "mass_absolute"
      ) %>%
      dplyr::mutate(
        taxon_rank_value = "Sphagnum papillosum"
      ),
  ) %>%
  dplyr::mutate(
    origin_sample_treatment =
      dplyr::case_when(
        origin_sample_treatment == "NO364" ~ "NO3 64",
        TRUE ~ origin_sample_treatment
      ),
    mass_relative_mass = mass_absolute/samples2$mass_absolute,
    id_replicate = 
      id_replicate %>%
      stringr::str_remove("^id_replicate_") %>%
      as.numeric(),
    sample_treatment = "control", #---note: samples incubate in control treatment
    sample_type = "litter",
    taxon_rank_name = "species",
    taxon_organ = "shoots",
    mesh_size_absolute = 0.71,
    sampling_date = as.Date("2014-09-22"),
    incubation_duration = 
      1 %>%
      lubridate::dyears() %>%
      lubridate::time_length(unit = "days"),
    sampling_year = lubridate::year(sampling_date),
    sampling_month = lubridate::month(sampling_date),
    sampling_day = lubridate::day(sampling_date),
    sample_microhabitat = "hummock",
    sample_depth_upper = 3,
    sample_depth_lower = 8,
    id_dataset = datasets$id_dataset[[1]],
    is_incubated = TRUE,
    incubation_environment = "peat",
    site_name = "Whim bog",
    sampling_longitude = -3.271656,
    sampling_latitude = 55.765479,
    experimental_design =
      paste0(
        as.numeric(as.factor(site_name)), "_",
        as.numeric(as.factor(origin_sample_treatment)), "_",
        as.numeric(as.factor(sample_treatment)), "_",
        as.numeric(as.factor(id_plot)), "_",
        as.numeric(as.factor(id_replicate))
        ),
    comments_samples = "Coordinates are only the approximate location of the site, but not the exact sampling point."
  )
## New names:
## New names:
## • `treatment` -> `treatment...1`
## • `plot` -> `plot...2`
## • `letter code` -> `letter code...3`
## • `1` -> `1...4`
## • `2` -> `2...5`
## • `3` -> `3...6`
## • `4` -> `4...7`
## • `5` -> `5...8`
## • `6` -> `6...9`
## • `7` -> `7...10`
## • `8` -> `8...11`
## • `` -> `...12`
## • `` -> `...13`
## • `` -> `...14`
## • `` -> `...15`
## • `treatment` -> `treatment...16`
## • `plot` -> `plot...17`
## • `letter code` -> `letter code...18`
## • `1` -> `1...19`
## • `2` -> `2...20`
## • `3` -> `3...21`
## • `4` -> `4...22`
## • `5` -> `5...23`
## • `6` -> `6...24`
## • `7` -> `7...25`
## • `8` -> `8...26`
## • `` -> `...27`
## • `` -> `...28`
## • `` -> `...29`
## • `treatment` -> `treatment...30`
## • `plot` -> `plot...31`
## • `letter code` -> `letter code...32`
## • `1` -> `1...33`
## • `2` -> `2...34`
## • `3` -> `3...35`
## • `4` -> `4...36`
## • `5` -> `5...37`
## • `6` -> `6...38`
## • `7` -> `7...39`
## • `8` -> `8...40`
## • `` -> `...41`
## • `` -> `...42`
samples3 %>% 
  dplyr::filter(! stringr::str_detect(origin_sample_treatment, "PK$")) %>%
  dplyr::group_by(taxon_rank_value, origin_sample_treatment) %>%
  dplyr::summarize(
    y = mean(mass_relative_mass, na.rm = T),
    y_sd = sd(mass_relative_mass, na.rm = T),
    .groups = "drop"
  ) %>%
  dplyr::mutate(
    origin_sample_treatment = factor(origin_sample_treatment, levels = c("control", "NO3 16", "NO3 64", "NH4 16", "NH4 64"))
  ) %>% 
  ggplot(aes(y = y, x = origin_sample_treatment, fill = taxon_rank_value)) + 
  geom_bar(stat="identity", position = position_dodge(0.9)) + 
  geom_errorbar(aes(ymin = y - y_sd, ymax = y + y_sd), position = position_dodge(0.9)) + 
  coord_cartesian(ylim = c(0.5, 1))

# litter collection
samples1 <- 
  samples2 %>%
  dplyr::filter(!duplicated(paste0(taxon_rank_value, "_", origin_sample_treatment))) %>% #---note: all replicate litterbags prepared were prepared from homogenized litter collected from all plots
  dplyr::mutate(
    sample_treatment = origin_sample_treatment,
    origin_sample_treatment = NA_character_,
    id_plot = NA_character_,
    plot_label = NA_character_,
    id_replicate = NA_integer_,
    sampling_day = NA_real_,
    sampling_month = 8,
    id_sample = seq_len(nrow(.)) + id_last$id_sample,
    id_sample_origin = id_sample,
    id_sample_parent = id_sample,
    id_sample_incubation_start = NA_integer_,
    is_incubated = FALSE,
    incubation_environment = NA_character_,
    experimental_design =
      paste0(
        as.numeric(as.factor(site_name)), "_",
        as.numeric(as.factor(origin_sample_treatment)), "_",
        as.numeric(as.factor(sample_treatment)), "_",
        as.numeric(as.factor(id_plot)), "_",
        as.numeric(as.factor(id_replicate))
      ),
    comments_samples = paste0(comments_samples, " samples are mixtures of individual samples collected from all replicate plots of each treatment.")
  )



# add missing ids
samples2 <- 
  dplyr::bind_rows(
    samples2 %>%
      dplyr::mutate(
        type = "samples2"
      ), 
    samples3 %>%
      dplyr::mutate(
        type = "samples3"
      )
  )


samples2 <- 
  samples2 %>%
  dplyr::mutate(
    id_sample = seq_len(nrow(.)) + max(samples1$id_sample),
    id_sample_origin = 
      dplyr::left_join(
        samples2 %>% 
          dplyr::select(sample_type, taxon_rank_value, origin_sample_treatment) %>%
          dplyr::rename(
            sample_treatment = "origin_sample_treatment"
          ),
        samples1 %>% dplyr::select(sample_type, taxon_rank_value, sample_treatment, id_sample),
        by = c("sample_type", "taxon_rank_value", "sample_treatment")
      ) %>%
      dplyr::pull(id_sample),
    id_sample_incubation_start = 
      purrr::map_int(seq_len(nrow(.)), function(i) {
        index <- paste0(sample_type, "_", taxon_rank_value, "_", site_name, "_", taxon_organ, "_", sample_depth_upper) == paste0(sample_type, "_", taxon_rank_value, "_", site_name, "_", taxon_organ, "_", sample_depth_upper)[[i]] & experimental_design == experimental_design[[i]] & incubation_duration == 0.0
        id_sample[index]
      }),
    id_sample_parent = 
      purrr::map_int(seq_len(nrow(.)), function(i) {
        index <- paste0(sample_type, "_", taxon_rank_value, "_", site_name, "_", taxon_organ, "_", sample_depth_upper) == paste0(sample_type, "_", taxon_rank_value, "_", site_name, "_", taxon_organ, "_", sample_depth_upper)[[i]] & experimental_design == experimental_design[[i]] & incubation_duration < incubation_duration[[i]]
        if(! any(index)) {
          id_sample_origin[[i]]
        } else {
          target_incubation_duration <- max(incubation_duration[index])
          index <- index & incubation_duration == target_incubation_duration
          id_sample[index]
        }
      })
  )


# litter chemistry: I have two types of data: I have verified that `S.papillosum_CN litterbags_2013_copy.xlsx` contains measurements for individual litterbags for S. capillifolium for the 2013 experiment from @Manninen.2016. For S. papillosum, measurements were made only on litter pooled from individual plots of the same treatments (but the text does not say whether equal masses were used). However, if I understand the text correctly, also the litter used for the litterbag experiment was pooled in such a way and C and N measurements were made on aliquots of this pooled litter. Hence, I bind all values to the initial mass values
d49_litter_chemistry_1 <- 
  readxl::read_excel(paste0(dir_source, "/raw/S.papillosum_CN litterbags_2013_copy.xlsx"), sheet = 2L) %>%
  dplyr::slice(1:36) %>%
  dplyr::select(c(2, 8, 10)) %>%
  setNames(c("id_plot", "C_relative_mass", "N_relative_mass")) %>%
  dplyr::mutate(
    id_plot = as.character(id_plot),
    dplyr::across(
      dplyr::ends_with("_mass_relative"),
      function(.x) .x/100
    ),
    incubation_duration = 0,
    taxon_rank_value = "Sphagnum capillifolium"
  )


d49_litter_chemistry_2 <- 
  readxl::read_excel(paste0(dir_source, "/raw/Moss bags_September 2013-2014.xlsx"), sheet = 4L, skip = 18L) %>%
  dplyr::select(c(1, 2, 4)) %>%
  setNames(c("sample_treatment", "C_relative_mass", "N_relative_mass")) %>%
  dplyr::mutate(
    sample_treatment =
      dplyr::case_when(
        stringr::str_detect(sample_treatment, "^NO") ~ stringr::str_replace(sample_treatment, "^NO", "NO3 "),
        stringr::str_detect(sample_treatment, "^NH") ~ stringr::str_replace(sample_treatment, "^NH", "NH3 "),
        sample_treatment == "con" ~ "control"
      ),
    sample_treatment =
      dplyr::case_when(
        stringr::str_detect(sample_treatment, "PK$") ~ stringr::str_replace(sample_treatment, "PK$", " PK"),
        TRUE ~ sample_treatment
      ),
    dplyr::across(
      dplyr::ends_with("_mass_relative"),
      function(.x) .x/100
    ),
    incubation_duration = 0,
    taxon_rank_value = "Sphagnum papillosum"
  )
## New names:
## • `` -> `...1`
## • `mass remaining %` -> `mass remaining %...3`
## • `mass remaining %` -> `mass remaining %...5`
## • `mass remaining %` -> `mass remaining %...7`
samples2 <- 
  dplyr::bind_rows(
    dplyr::left_join(
      samples2 %>%
        dplyr::filter(taxon_rank_value == "Sphagnum capillifolium"),
      d49_litter_chemistry_1,
      by = c("taxon_rank_value", "incubation_duration", "id_plot")
    ),
    dplyr::left_join(
      samples2 %>%
        dplyr::filter(taxon_rank_value == "Sphagnum papillosum"),
      d49_litter_chemistry_2,
      by = c("taxon_rank_value", "incubation_duration", "sample_treatment")
    )
  ) %>%
  dplyr::arrange(id_sample) %>%
  dplyr::mutate(
    C_absolute = NA_real_,
    N_absolute = NA_real_
  )
  


## combine
samples <- 
  dplyr::bind_rows(
    db_template_tables$samples,
    samples1 %>%
      dplyr::mutate(
        type = "samples1"
      ) %>%
      dplyr::select(-sampling_date),
    samples2
  )

2.4 samples_to_samples

samples_to_samples <- 
  samples %>%
  dplyr::filter(! id_sample %in% id_sample_origin) %>%
  dplyr::mutate(
    transition_description =
      dplyr::case_when(
        type %in% c("samples2") ~ "translocate",
        type %in% c("samples3") ~ "wait",
        TRUE ~ NA_character_
      )
  ) %>%
  dplyr::select(id_sample_parent, id_sample, transition_description) %>%
  dplyr::rename(
    id_sample_child = "id_sample"
  )

2.5 data

d2 <- 
  samples2 %>%
  tidyr::pivot_longer(
    cols = dplyr::all_of(c("mass_absolute", "mass_relative_mass", "mesh_size_absolute", "C_relative_mass", "N_relative_mass", "C_absolute", "N_absolute")),
    names_to = "attribute_name",
    values_to = "value"
  ) %>%
  dplyr::mutate(
    id_measurement = seq_len(nrow(.)) + id_last$id_measurement,
    id_measurement_numerator =
      purrr::map_int(seq_len(nrow(.)), function(i) {
        if(attribute_name[[i]] == "mass_relative_mass") {
          id_measurement[id_sample == id_sample[[i]] & attribute_name == "mass_absolute"]
        } else if (stringr::str_detect(attribute_name[[i]], pattern = "_relative_mass$")) {
          id_measurement[id_sample == id_sample[[i]] & attribute_name == stringr::str_replace(attribute_name[[i]], "_relative_mass2?", "_absolute")]
        } else {
          NA_integer_
        }
      }),
    id_measurement_denominator =
      purrr::map_int(seq_len(nrow(.)), function(i) {
        if(attribute_name[[i]] == "mass_relative_mass") {
          id_measurement[id_sample == id_sample_incubation_start[[i]] & attribute_name == "mass_absolute"]
        } else if (stringr::str_detect(attribute_name[[i]], pattern = "_relative_mass$")) {
          id_measurement[id_sample == id_sample[[i]] & attribute_name == "mass_absolute"]
        } else {
          NA_integer_
        }
      }),
    value_type = "point",
    sample_size = 1L,
    error = NA_real_,
    error_type = "sd"
  )


# combine
d <- 
  dplyr::bind_rows(
    db_template_tables$data,
    d2
  ) %>%
  dplyr::select(dplyr::all_of(colnames(db_template_tables$data)))

2.6 experimental_design_format

experimental_design_format <- 
  tibble::tibble(
    id_dataset = datasets$id_dataset,
    file = paste0(id_last$id_dataset + 1L, "/experimental_design_format.csv"),
    experimental_design_description = "`site_name`: Name of the site. `sample_treatment`: Character value indicating the form (NO3 or NH3) and dose (kg ha$^{-1}$ yr$^{-1}$) of N fertilization and whether in addition P and K were added (see @Manninen.2016 for details) during sample growth (for non-incubated litter samples) or during incubation (the dataset contains only data from samples  incubated in control plots). `origin_sample_treatment` has the same format as `sample_treatment`, but defines for incubated samples in which plots the litter was grown. `id_plot`: Character representing the ID of te experimental plot (for *S. papillosum*, there is no plot label because all data and samples are pooled values from several plots, see @Manninen.2016). `id_replicate`: Integer value denoting replicate litter bags."
  )

# csv file to export
experimental_design_format2 <- 
  samples %>%
  dplyr::filter(! is.na(experimental_design)) %>%
  dplyr::filter(! duplicated(experimental_design)) %>%
  dplyr::select(experimental_design, site_name, origin_sample_treatment, sample_treatment, id_plot, id_replicate)

# export
write.csv(experimental_design_format2, paste0(dir_target, "/experimental_design_format.csv"), row.names = FALSE)

3 Export to database

# list all tables
dm_insert_in <-
  list(
    datasets = 
      datasets %>% 
      dplyr::select(dplyr::all_of(colnames(dm_dpeatdecomposition$datasets))),
    samples = 
      samples %>% 
      dplyr::select(dplyr::all_of(colnames(dm_dpeatdecomposition$samples))),
    data = 
      d %>% 
      dplyr::select(dplyr::all_of(colnames(dm_dpeatdecomposition$data))),
    samples_to_samples = 
      samples_to_samples %>% 
      dplyr::select(dplyr::all_of(colnames(dm_dpeatdecomposition$samples_to_samples))),
    citations_to_datasets = 
      citations_to_datasets %>% 
      dplyr::select(dplyr::all_of(colnames(dm_dpeatdecomposition$citations_to_datasets))),
    experimental_design_format = 
      experimental_design_format %>% 
      dplyr::select(dplyr::all_of(colnames(dm_dpeatdecomposition$experimental_design_format)))
  )

# check whether all column names as present in table attributes
all_column_names <- 
  purrr::map(dm_insert_in, colnames) %>%
  unlist() %>%
  unique()

if(! all(all_column_names %in% (dm_dpeatdecomposition %>% dm::pull_tbl(attributes) %>% dplyr::pull(attribute_name)))) {
  cond <- purrr::map_lgl(all_column_names, function(.x) ! .x %in% (dm_dpeatdecomposition %>% dm::pull_tbl(attributes) %>% dplyr::pull(attribute_name)))
  RMariaDB::dbDisconnect(con)
  stop(paste0("New `attribute_name`s discovered: ", paste(all_column_names[cond], collapse = ", ")))
}

all_data_attributes <- unique(dm_insert_in$data$attribute_name)

if(! all(all_data_attributes %in% (dm_dpeatdecomposition %>% dm::pull_tbl(attributes) %>% dplyr::pull(attribute_name)))) {
  cond <- purrr::map_lgl(all_data_attributes, function(.x) ! .x %in% (dm_dpeatdecomposition %>% dm::pull_tbl(attributes) %>% dplyr::pull(attribute_name)))
  stop(paste0("New `attribute_name`s discovered: ", paste(all_data_attributes[cond], collapse = ", ")))
  RMariaDB::dbDisconnect(con)
}


# filter empty tables
dm_insert_in_check <-
  dm_insert_in[purrr::map_lgl(dm_insert_in, function(x) nrow(x) > 0)] %>%
  dm::as_dm() %>%
  dp_dm_add_keys(dm_dpeatdecomposition)

# copy into dm_pmird
for(i in seq_along(dm_insert_in)) {
  RMariaDB::dbAppendTable(con, name = names(dm_insert_in)[[i]], value = dm_insert_in[[i]])
}

RMariaDB::dbDisconnect(con)

4 Notes