Import sleep diaries

Author

Johannes Zauner

Preface

This document imports the sleepdiaries and shows descriptive statistics for the site.

Setup

library(tidyverse)
Warning: package 'ggplot2' was built under R version 4.5.2
Warning: package 'tidyr' was built under R version 4.5.2
Warning: package 'dplyr' was built under R version 4.5.2
library(LightLogR)
library(glue)
library(readxl)
library(gt)
library(gtsummary)

remote <- 
  "https://raw.githubusercontent.com/MeLiDosProject/Data_Metadata_Conventions/main/scripts/"

c("labeling",
  "radio_factors",
  "time_summaries",
  "prepare_codebook",
  "filefinder",
  "general_parameters",
  "coltype_checker",
  "tables"
) |> walk(\(x) source(paste0(remote, x, ".R")))
Warning: package 'rlang' was built under R version 4.5.2

Collect the necessary external information

codebook <- 
  read_csv(
  "https://raw.githubusercontent.com/MeLiDosProject/Data_Metadata_Conventions/main/codebook/MeLiDosMorningSleepDiaries_DataDictionary_2024-10-16.csv", show_col_types = FALSE
  )
#clean up labels
codebook <-
  codebook |> 
    mutate(
      `Field Label` = 
        `Field Label` |> 
        str_remove_all("<div class=\"rich-text-field-label\">|<p>|<em>|</em>|</p>|</div>")
    )

Collect files

The following files contain sleep-diary information.

#path to participants
path_part1 <- "../data/raw/individual"
#path to questionnaire
path_part2 <- "/continuous/sleepdiary"
#getting all subfolders
folders <- dir(path_part1)
#creating complete folder names
paths <- glue("{path_part1}/{folders}{path_part2}")
#collecting file names
files <- list.files(paths, full.names = TRUE)

Import files

sleepdiary <-
  read_csv2(files, show_col_types = FALSE) |> 
  drop_na(redcap_repeat_instance) |> 
  mutate(
    across(
      c(sleep, bedtime, offset, out_ofbed), 
      \(x) parse_date_time(x, orders = "dmyHM", tz = "UTC")
      ),
    record_id = paste0("MPI_S", record_id),
    across(
      c(awakenings, awake_duration),
      parse_number
    ),
    sleepquality = 
      sleepquality___1 + 2*sleepquality___2 + 
      3*sleepquality___3 + 4*sleepquality___4 + 5*sleepquality___5,
    daytype2 = daytype2___1 + 2*daytype2___2
    )
ℹ Using "','" as decimal and "'.'" as grouping mark. Use `read_delim()` for more control.

Check column types

should_POSIXct <- c("bedtime", "sleep", "offset", "out_ofbed")
should_numeric <- c("sleepdelay", "awakenings", "awake_duration", "sleepquality", "daytype2")
should_character <- c("record_id", "comments")

stopifnot(
  "all of should_POSIXct need to be part of the dataset and of type POSIXct" =
  should_POSIXct %in% (sleepdiary |> select(where(is.POSIXct)) |> names()),
  "all of should_numeric need to be part of the dataset and of type numeric" =
  should_numeric %in% (sleepdiary |> select(where(is.numeric)) |> names()),
  "all of should_character need to be part of the dataset and of type character" =
  should_character %in% (sleepdiary |> select(where(is.character)) |> names())
  )

Select relevant variables &nonempty rows

sleepdiary <- 
sleepdiary |> 
  select(all_of(c(should_character, should_POSIXct, should_numeric)))

Set sleep time zone

site <-  "MPI"
sleepdiary <- 
  sleepdiary |> 
  mutate(across(where(is.POSIXct), 
                \(x) force_tz(x, tzs[[site]]))
         )

Label variables

sleepdiary <-
sleepdiary |> 
  add_radio_factors(codebook, 
                    var_col = `Variable / Field Name`, 
                    type_col = `Field Type`,
                    levels_col = `Choices, Calculations, OR Slider Labels`
                    ) |> 
  add_col_labels(codebook, var_col = `Variable / Field Name`, label_col = `Field Label`)
Warning in add_col_labels(add_radio_factors(sleepdiary, codebook, var_col =
`Variable / Field Name`, : Labels provided for variables not in `data`: uuid,
startdate, enddate, scheduledate, status, supplementaldata, serializedresult

Calculate sleep and sleepduration

sleepdiary <- 
sleepdiary |> 
  rename(wake = offset, sleepprep = sleep) |> 
  mutate(sleep = sleepprep + dminutes(sleepdelay),
         sleep_duration = (wake - sleep)/60
  )

attr(sleepdiary$sleep, "label") <- "Sleep onset (calculated)"
attr(sleepdiary$sleep_duration, "label") <- "Sleep duration (calculated)"
attr(sleepdiary$sleep_duration, "units") <- "hours"

Ensure nighttime awake duration when there are no awakenings

sleepdiary <-
  sleepdiary |> 
  mutate(
    awake_duration = replace_when(awake_duration,
                                  awakenings == 0 ~ 0)
  )

Summarize diary results

table_sleep <- 
sleepdiary |> 
  tbl_summary(include = -c(comments, record_id),
              statistic = list(all_continuous() ~ "{median} ({p25}, {p75})", 
                               all_categorical() ~ "{n} ({p}%)",
                               c(bedtime, sleep, sleepprep)  ~ "{time_median} ({nighttime_p25}, {nighttime_p75})",
                               c(wake, out_ofbed)  ~ "{time_median} ({daytime_p25}, {daytime_p75})"
                               ),
              type = awakenings ~ "continuous",
              missing_text = "missing") |> 
  add_n() |> 
  bold_labels() |> 
  modify_header(label = "**Morning sleep diary**") |> 
  modify_footnote_header("Nighttime variables center on midnight, daytime variables on noon; median for time is based on circular time", columns = stat_0, replace = FALSE)

table_sleep
Morning sleep diary N N = 1821,2
What time did you get into bed? 182 23:46:00 (22:50:00, 00:56:30)
What time did you try to go to sleep? 182 00:14:30 (23:20:00, 01:13:45)
What time was your final awakening? i.,e. when did you wake up today? 181 07:45:00 (07:00:00, 09:00:00)
    missing
1
What time did you get out of bed for the day? 181 08:14:00 (07:10:00, 09:20:00)
    missing
1
How long did it take you to fall asleep? Please answer in minutes 181 10 (5, 20)
    missing
1
How many times did you wake up, not counting your final awakening? 181 1.00 (0.00, 2.00)
    missing
1
In total, how long did these awakenings last? Please answer in minutes 181 5 (0, 10)
    missing
1
How would you rate the quality of your sleep? 176
    Very poor
4 (2.3%)
    Poor
20 (11%)
    Fair
69 (39%)
    Good
71 (40%)
    Very good
12 (6.8%)
    missing
6
Today is... 181
    a free day
66 (36%)
    a work day
115 (64%)
    missing
1
Sleep onset (calculated) 181 00:40:00 (23:45:00, 01:45:00)
    missing
1
Sleep duration (calculated) 181 7.43 hours (6.43 hours, 8.08 hours)
    missing
1
1 time_median (nighttime_p25, nighttime_p75); time_median (daytime_p25, daytime_p75); Median (Q1, Q3); n (%)
2 Nighttime variables center on midnight, daytime variables on noon; median for time is based on circular time
gtsave(table_sleep |> as_gt(), filename = "../output/tables/table_sleepdiary.png")
file:////var/folders/9p/326_k3kx43qbn_cyl1rqfhb00000gn/T//RtmpAgvY2Z/file1d756811940c.html screenshot completed

Translate comments

Native language is translated into English with AI and later checked by a site researcher.

# 
# library(ellmer)
# 
# #Providing the relevant codebook portions
# codebook_red <-
# codebook|>
#   pmap(~ paste(paste(names(codebook), c(...), sep = ": "), collapse = ", ")) |>
#   list_c() |>
#   paste0(collapse = "newline: ")
# 
# chat <- chat_openai(paste0("Copy content and make translations according to specific instructions. They represent answers to questionnaires in a scientific field study."))
# 
# #Providing the input
# data_red <-
# sleepdiary|>
#   select(record_id, comments)
# data_red <-
#   data_red |>
#   pmap(~ paste(paste(names(data_red), c(...), sep = ": "), collapse = ", "))
# 
# #creating an output structure
# type_data <- type_object(
#   record_id = type_string("use the record_id information"),
#   comments = type_string("copy the original information here", required = FALSE),
#   comments_english = type_string("translation of the original information into english", required = FALSE)
# )
# 
# data_llm <-
# parallel_chat_structured(
#   chat,
#   data_red,
#   type = type_data,
#   rpm = 500,
#   max_active = 100
# )
# 
# #Ensure that no NA is caught as string
# data_llm <-
#   data_llm |>
#   mutate(comments = case_when(comments == "NA" ~ NA, .default = comments),
#          comments_english = case_when(comments_english == "NA" ~ NA, .default = comments_english))
# 
# #check that input and output are identical
# stopifnot("Input must by identical to output check" =
#             all(data_llm$comments == sleepdiary$comments, na.rm = TRUE))
# stopifnot("Input must by identical to output check" =
#             all(data_llm$record_id == sleepdiary$record_id, na.rm = TRUE))
# 
# data_llm <-
#   data_llm |>
#   distinct(record_id, comments, .keep_all = TRUE)
# 
# path <- "../data/AI_translations/"
# if(!dir.exists(path)) dir.create(path, recursive = TRUE)
# write_csv(data_llm, "../data/AI_translations/sleepdiary.csv")
data_llm <- 
  read_csv("../data/AI_translations/sleepdiary.csv")
Rows: 92 Columns: 3
── Column specification ────────────────────────────────────────────────────────
Delimiter: ","
chr (3): record_id, comments, comments_english

ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
#add output to original
sleepdiary <- sleepdiary |> left_join(data_llm, by = c("record_id", "comments"))
attr(sleepdiary$comments_english, "label") <- "Comments (English translation)"

Sort by date

sleepdiary <- 
  sleepdiary |> 
  arrange(record_id, bedtime) |> 
  dplyr::relocate(comments, .after = daytype2)

Export

sleepdiary <- sleepdiary |> rename(Id = record_id)
attr(sleepdiary$sleep, "label") <- "Sleep onset (calculated)"
attr(sleepdiary$sleep_duration, "label") <- "Sleep duration (calculated)"
path <- "../data/imported/continuous/"
if(!dir.exists(path)) dir.create(path, recursive = TRUE)
save(sleepdiary, file = "../data/imported/continuous/sleepdiaries.RData")