Import sleep diaries

Author

Johannes Zauner

Preface

This document imports the sleepdiaries and shows descriptive statistics for the site.

Setup

library(tidyverse)
Warning: package 'ggplot2' was built under R version 4.5.2
Warning: package 'tibble' was built under R version 4.5.2
Warning: package 'tidyr' was built under R version 4.5.2
Warning: package 'purrr' was built under R version 4.5.2
Warning: package 'dplyr' was built under R version 4.5.2
library(LightLogR)
library(glue)
Warning: package 'glue' was built under R version 4.5.2
library(readxl)
library(gt)
library(gtsummary)

remote <- 
  "https://raw.githubusercontent.com/MeLiDosProject/Data_Metadata_Conventions/main/scripts/"

c("labeling",
  "radio_factors",
  "time_summaries",
  "prepare_codebook",
  "filefinder",
  "general_parameters",
  "coltype_checker",
  "tables"
) |> walk(\(x) source(paste0(remote, x, ".R")))
Warning: package 'rlang' was built under R version 4.5.2

Preparation

#collect codebook
codebook <- prepare_codebook("MeLiDosMorningSleepDiaries_DataDictionary_2024-10-16.csv")
#collect files
files <- filefinder("sleepdiary", continuous = TRUE)
#import files
data <- 
  read_csv(files, show_col_types = FALSE) |> 
  drop_na(redcap_repeat_instance) |> 
  rename_with(\(x) str_replace(x, "_v2$", ""))
#check column types
coltype_check <- coltype_checker(codebook, data)
coltype_check$details |> gt()
col expected present actual type_ok issue expected_example
bedtime POSIXct TRUE POSIXct TRUE ok as.POSIXct(..., tz = 'UTC')
sleep POSIXct TRUE POSIXct TRUE ok as.POSIXct(..., tz = 'UTC')
offset POSIXct TRUE POSIXct TRUE ok as.POSIXct(..., tz = 'UTC')
out_ofbed POSIXct TRUE POSIXct TRUE ok as.POSIXct(..., tz = 'UTC')
sleepdelay numeric TRUE numeric TRUE ok as.numeric(...)
awakenings numeric TRUE numeric TRUE ok as.numeric(...)
awake_duration numeric TRUE numeric TRUE ok as.numeric(...)
sleepquality numeric TRUE numeric TRUE ok as.numeric(...)
daytype2 numeric TRUE numeric TRUE ok as.numeric(...)
status numeric TRUE numeric TRUE ok as.numeric(...)
scheduledate Date TRUE logical FALSE wrong_type as.Date(...)
record_id character TRUE character TRUE ok as.character(...)
comments character TRUE character TRUE ok as.character(...)
uuid character TRUE character TRUE ok as.character(...)
supplementaldata character TRUE character TRUE ok as.character(...)
serializedresult character TRUE character TRUE ok as.character(...)
#collect relevant columns: POSIXct, Date & numeric
relevant_columns <- 
  coltype_check$details |> 
  filter(expected %in% c("POSIXct", "numeric", "Date")) |> 
  pull(col) |> 
  setdiff(c("status", "scheduledate"))
#add specific character columns
relevant_columns <- c("record_id", relevant_columns, "comments")
#select relevant columns
data <- data |> select(any_of(relevant_columns))

Set sleep time zone

site = "FUSPCEU"
data <- 
  data |> 
  mutate(across(where(is.POSIXct), 
                \(x) force_tz(x, tzs[[site]]))
         )
#label variables
data <-
data |> 
  add_radio_factors(codebook, 
                    var_col = `Variable / Field Name`, 
                    type_col = `Field Type`,
                    levels_col = `Choices, Calculations, OR Slider Labels`
                    ) |> 
  add_col_labels(codebook, var_col = `Variable / Field Name`, label_col = `Field Label`)
Warning in add_col_labels(add_radio_factors(data, codebook, var_col = `Variable
/ Field Name`, : Labels provided for variables not in `data`: uuid, startdate,
enddate, scheduledate, status, supplementaldata, serializedresult

Calculate sleep and sleepduration

data <- 
data |> 
  rename(wake = offset, sleepprep = sleep) |> 
  mutate(sleep = sleepprep + dminutes(sleepdelay),
         sleep_duration = wake - sleep
  )

attr(data$sleep, "label") <- "Sleep onset (calculated)"
attr(data$sleep_duration, "label") <- "Sleep duration (calculated)"

Ensure nighttime awake duration when there are no awakenings

data <-
  data |> 
  mutate(
    awake_duration = replace_when(awake_duration,
                                  awakenings == 0 ~ 0)
  )

Summarize results

table_sleepdiary(data)
Morning sleep diary N N = 1491,2
What time did you get into bed? 149 23:07:00 (22:20:00, 00:00:00)
What time did you try to go to sleep? 149 23:30:00 (22:45:00, 00:30:00)
What time was your final awakening? i.,e. when did you wake up today? 149 07:45:00 (07:00:00, 09:07:00)
What time did you get out of bed for the day? 149 08:00:00 (07:16:00, 09:35:00)
How long did it take you to fall asleep? Please answer in minutes 149 10 (5, 25)
How many times did you wake up, not counting your final awakening? 149 1.00 (0.00, 2.00)
In total, how long did these awakenings last? Please answer in minutes 149 1 (0, 5)
How would you rate the quality of your sleep? 149
    Very poor
5 (3.4%)
    Poor
15 (10%)
    Fair
27 (18%)
    Good
65 (44%)
    Very good
37 (25%)
Today is... 149
    a free day
54 (36%)
    a work day
95 (64%)
Sleep onset (calculated) 149 23:50:00 (23:01:00, 00:40:00)
Sleep duration (calculated) 149 7.82 hours (7.10 hours, 8.83 hours)
1 time_median (nighttime_p25, nighttime_p75); time_median (daytime_p25, daytime_p75); Median (Q1, Q3); n (%)
2 Nighttime variables center on midnight, daytime variables on noon; median for time is based on circular time
gtsave(table_sleepdiary(data) |> as_gt(), filename = "../output/tables/table_sleepdiary.png", vwidth = 800)
file:////var/folders/9p/326_k3kx43qbn_cyl1rqfhb00000gn/T//Rtmpo1S572/file38283326e149.html screenshot completed

Translate comments

Native language is translated into English with AI and later checked by a site researcher.

# library(ellmer)
# 
# #Providing the relevant codebook portions
# codebook_red <-
# codebook|>
#   pmap(~ paste(paste(names(codebook), c(...), sep = ": "), collapse = ", ")) |>
#   list_c() |>
#   paste0(collapse = "newline: ")
# 
# chat <- chat_openai(paste0("Copy content and make translations according to specific instructions. They represent answers to questionnaires in a scientific field study."))
# 
# #Providing the input
# data_red <-
# data|>
#   select(record_id, comments)
# data_red <-
#   data_red |>
#   pmap(~ paste(paste(names(data_red), c(...), sep = ": "), collapse = ", "))
# 
# #creating an output structure
# type_data <- type_object(
#   record_id = type_string("use the record_id information"),
#   comments = type_string("copy the original information here", required = FALSE),
#   comments_english = type_string("translation of the original information into english", required = FALSE)
# )
# 
# data_llm <-
# parallel_chat_structured(
#   chat,
#   data_red,
#   type = type_data,
#   rpm = 500,
#   max_active = 100
# )
# 
# #Ensure that no NA is caught as string
# data_llm <-
#   data_llm |>
#   mutate(comments = case_when(comments == "NA" ~ NA, .default = comments),
#          comments_english = case_when(comments_english == "NA" ~ NA, .default = comments_english))
# 
# #check that input and output are identical
# stopifnot("Input must by identical to output check" =
#             all(data_llm$comments == data$comments, na.rm = TRUE))
# stopifnot("Input must by identical to output check" =
#             all(data_llm$record_id == data$record_id, na.rm = TRUE))
# 
# data_llm <-
#   data_llm |>
#   distinct(record_id, comments, .keep_all = TRUE)
# 
# path <- "../data/AI_translations/"
# if(!dir.exists(path)) dir.create(path, recursive = TRUE)
# write_csv(data_llm, "../data/AI_translations/sleepdiary.csv")
data_llm <- 
  read_csv("../data/AI_translations/sleepdiary.csv")
Rows: 81 Columns: 3
── Column specification ────────────────────────────────────────────────────────
Delimiter: ","
chr (3): record_id, comments, comments_english

ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
#add output to original
data <- data |> left_join(data_llm, by = c("record_id", "comments"))
attr(data$comments_english, "label") <- "Comments (English translation)"

Sort by date

data <- 
  data |> 
  arrange(record_id, bedtime)

Export

data <- data |> rename(Id = record_id)
attr(data$sleep, "label") <- "Sleep onset (calculated)"
attr(data$sleep_duration, "label") <- "Sleep duration (calculated)"
sleepdiary <- data
path <- "../data/imported/continuous/"
if(!dir.exists(path)) dir.create(path, recursive = TRUE)
save(sleepdiary, file = "../data/imported/continuous/sleepdiaries.RData")