---
title: "Import sleep diaries"
author: "Johannes Zauner"
format:
html:
self-contained: true
code-tools: true
---
## Preface
This document imports the `sleepdiaries` and shows descriptive statistics for the site.
## Setup
```{r}
#| message: false
library(tidyverse)
library(LightLogR)
library(glue)
library(readxl)
library(gt)
library(gtsummary)
remote <-
"https://raw.githubusercontent.com/MeLiDosProject/Data_Metadata_Conventions/main/scripts/"
c("labeling",
"radio_factors",
"time_summaries",
"prepare_codebook",
"filefinder",
"general_parameters",
"coltype_checker",
"tables"
) |> walk(\(x) source(paste0(remote, x, ".R")))
```
Collect the necessary external information
```{r}
codebook <-
read_csv(
"https://raw.githubusercontent.com/MeLiDosProject/Data_Metadata_Conventions/main/codebook/MeLiDosMorningSleepDiaries_DataDictionary_2024-10-16.csv", show_col_types = FALSE
)
#clean up labels
codebook <-
codebook |>
mutate(
`Field Label` =
`Field Label` |>
str_remove_all("<div class=\"rich-text-field-label\">|<p>|<em>|</em>|</p>|</div>")
)
```
## Collect files
The following files contain sleep-diary information.
```{r}
#path to participants
path_part1 <- "../data/raw/individual"
#path to questionnaire
path_part2 <- "/continuous/sleepdiary"
#getting all subfolders
folders <- dir(path_part1)
#creating complete folder names
paths <- glue("{path_part1}/{folders}{path_part2}")
#collecting file names
files <- list.files(paths, full.names = TRUE)
```
## Import files
```{r}
sleepdiary <-
read_csv2(files, show_col_types = FALSE) |>
drop_na(redcap_repeat_instance) |>
mutate(
across(
c(sleep, bedtime, offset, out_ofbed),
\(x) parse_date_time(x, orders = "dmyHM", tz = "UTC")
),
record_id = paste0("MPI_S", record_id),
across(
c(awakenings, awake_duration),
parse_number
),
sleepquality =
sleepquality___1 + 2*sleepquality___2 +
3*sleepquality___3 + 4*sleepquality___4 + 5*sleepquality___5,
daytype2 = daytype2___1 + 2*daytype2___2
)
```
### Check column types
```{r}
should_POSIXct <- c("bedtime", "sleep", "offset", "out_ofbed")
should_numeric <- c("sleepdelay", "awakenings", "awake_duration", "sleepquality", "daytype2")
should_character <- c("record_id", "comments")
stopifnot(
"all of should_POSIXct need to be part of the dataset and of type POSIXct" =
should_POSIXct %in% (sleepdiary |> select(where(is.POSIXct)) |> names()),
"all of should_numeric need to be part of the dataset and of type numeric" =
should_numeric %in% (sleepdiary |> select(where(is.numeric)) |> names()),
"all of should_character need to be part of the dataset and of type character" =
should_character %in% (sleepdiary |> select(where(is.character)) |> names())
)
```
### Select relevant variables &nonempty rows
```{r}
sleepdiary <-
sleepdiary |>
select(all_of(c(should_character, should_POSIXct, should_numeric)))
```
### Set sleep time zone
```{r}
site <- "MPI"
sleepdiary <-
sleepdiary |>
mutate(across(where(is.POSIXct),
\(x) force_tz(x, tzs[[site]]))
)
```
### Label variables
```{r}
sleepdiary <-
sleepdiary |>
add_radio_factors(codebook,
var_col = `Variable / Field Name`,
type_col = `Field Type`,
levels_col = `Choices, Calculations, OR Slider Labels`
) |>
add_col_labels(codebook, var_col = `Variable / Field Name`, label_col = `Field Label`)
```
### Calculate sleep and sleepduration
```{r}
sleepdiary <-
sleepdiary |>
rename(wake = offset, sleepprep = sleep) |>
mutate(sleep = sleepprep + dminutes(sleepdelay),
sleep_duration = (wake - sleep)/60
)
attr(sleepdiary$sleep, "label") <- "Sleep onset (calculated)"
attr(sleepdiary$sleep_duration, "label") <- "Sleep duration (calculated)"
attr(sleepdiary$sleep_duration, "units") <- "hours"
```
### Ensure nighttime awake duration when there are no awakenings
```{r}
sleepdiary <-
sleepdiary |>
mutate(
awake_duration = replace_when(awake_duration,
awakenings == 0 ~ 0)
)
```
## Summarize diary results
```{r}
table_sleep <-
sleepdiary |>
tbl_summary(include = -c(comments, record_id),
statistic = list(all_continuous() ~ "{median} ({p25}, {p75})",
all_categorical() ~ "{n} ({p}%)",
c(bedtime, sleep, sleepprep) ~ "{time_median} ({nighttime_p25}, {nighttime_p75})",
c(wake, out_ofbed) ~ "{time_median} ({daytime_p25}, {daytime_p75})"
),
type = awakenings ~ "continuous",
missing_text = "missing") |>
add_n() |>
bold_labels() |>
modify_header(label = "**Morning sleep diary**") |>
modify_footnote_header("Nighttime variables center on midnight, daytime variables on noon; median for time is based on circular time", columns = stat_0, replace = FALSE)
table_sleep
gtsave(table_sleep |> as_gt(), filename = "../output/tables/table_sleepdiary.png")
```
### Translate comments
Native language is translated into English with AI and later checked by a site researcher.
```{r}
#| label: translate into english with AI
#
# library(ellmer)
#
# #Providing the relevant codebook portions
# codebook_red <-
# codebook|>
# pmap(~ paste(paste(names(codebook), c(...), sep = ": "), collapse = ", ")) |>
# list_c() |>
# paste0(collapse = "newline: ")
#
# chat <- chat_openai(paste0("Copy content and make translations according to specific instructions. They represent answers to questionnaires in a scientific field study."))
#
# #Providing the input
# data_red <-
# sleepdiary|>
# select(record_id, comments)
# data_red <-
# data_red |>
# pmap(~ paste(paste(names(data_red), c(...), sep = ": "), collapse = ", "))
#
# #creating an output structure
# type_data <- type_object(
# record_id = type_string("use the record_id information"),
# comments = type_string("copy the original information here", required = FALSE),
# comments_english = type_string("translation of the original information into english", required = FALSE)
# )
#
# data_llm <-
# parallel_chat_structured(
# chat,
# data_red,
# type = type_data,
# rpm = 500,
# max_active = 100
# )
#
# #Ensure that no NA is caught as string
# data_llm <-
# data_llm |>
# mutate(comments = case_when(comments == "NA" ~ NA, .default = comments),
# comments_english = case_when(comments_english == "NA" ~ NA, .default = comments_english))
#
# #check that input and output are identical
# stopifnot("Input must by identical to output check" =
# all(data_llm$comments == sleepdiary$comments, na.rm = TRUE))
# stopifnot("Input must by identical to output check" =
# all(data_llm$record_id == sleepdiary$record_id, na.rm = TRUE))
#
# data_llm <-
# data_llm |>
# distinct(record_id, comments, .keep_all = TRUE)
#
# path <- "../data/AI_translations/"
# if(!dir.exists(path)) dir.create(path, recursive = TRUE)
# write_csv(data_llm, "../data/AI_translations/sleepdiary.csv")
```
```{r}
data_llm <-
read_csv("../data/AI_translations/sleepdiary.csv")
#add output to original
sleepdiary <- sleepdiary |> left_join(data_llm, by = c("record_id", "comments"))
attr(sleepdiary$comments_english, "label") <- "Comments (English translation)"
```
### Sort by date
```{r}
sleepdiary <-
sleepdiary |>
arrange(record_id, bedtime) |>
dplyr::relocate(comments, .after = daytype2)
```
### Export
```{r}
sleepdiary <- sleepdiary |> rename(Id = record_id)
attr(sleepdiary$sleep, "label") <- "Sleep onset (calculated)"
attr(sleepdiary$sleep_duration, "label") <- "Sleep duration (calculated)"
path <- "../data/imported/continuous/"
if(!dir.exists(path)) dir.create(path, recursive = TRUE)
save(sleepdiary, file = "../data/imported/continuous/sleepdiaries.RData")
```