Import wellbeing diary

Author

Johannes Zauner

Preface

This document imports the WHO5 wellbeing diary and shows descriptive statistics for the site.

Setup

library(tidyverse)
Warning: package 'ggplot2' was built under R version 4.5.2
Warning: package 'tidyr' was built under R version 4.5.2
Warning: package 'dplyr' was built under R version 4.5.2
library(LightLogR)
library(glue)
library(readxl)
library(gt)
library(gtsummary)

remote <- 
  "https://raw.githubusercontent.com/MeLiDosProject/Data_Metadata_Conventions/main/scripts/"

c("labeling",
  "radio_factors",
  "time_summaries",
  "prepare_codebook",
  "filefinder",
  "add_label",
  "who5_scoring",
  "general_parameters",
  "coltype_checker",
  "diarydate",
  "tables"
) |> walk(\(x) source(paste0(remote, x, ".R")))
Warning: package 'rlang' was built under R version 4.5.2

Preparation

#collect codebook
codebook <- prepare_codebook("MeLiDosEveningDiaries_DataDictionary_2024-10-16.csv", 
                             form.filter = c("wellbeing_diary", "form_1"))
#collect files
files <- filefinder("wellbeingdiary", continuous = TRUE, individual = TRUE)
#import files
data1 <-
  read_csv2(files[c(1:3, 6:26)], show_col_types = FALSE) |> 
  drop_na(redcap_repeat_instance) |> 
  mutate(across(c(startdate_2, enddate_2), \(x) parse_date_time(x, c("dmyHM"))),
         record_id = paste0("MPI_S", record_id))
ℹ Using "','" as decimal and "'.'" as grouping mark. Use `read_delim()` for more control.
data2 <-
  read_csv2(files[4:5], show_col_types = FALSE) |> 
  drop_na(redcap_repeat_instance) |> 
  mutate(across(c(startdate_2, enddate_2), \(x) parse_date_time(x, c("ymdHMS"))),
         record_id = paste0("MPI_S", record_id))
ℹ Using "','" as decimal and "'.'" as grouping mark. Use `read_delim()` for more control.
data <- rbind(data1, data2) |> arrange(record_id, redcap_repeat_instance)

#check column types
coltype_check <- coltype_checker(codebook, data)
coltype_check$details |> gt()
col expected present actual type_ok issue expected_example
introduction_wellbeing numeric TRUE numeric TRUE ok as.numeric(...)
who5_1998_1 numeric TRUE numeric TRUE ok as.numeric(...)
who5_1998_2 numeric TRUE numeric TRUE ok as.numeric(...)
who5_1998_3 numeric TRUE numeric TRUE ok as.numeric(...)
who5_1998_4 numeric TRUE numeric TRUE ok as.numeric(...)
who5_1998_5 numeric TRUE numeric TRUE ok as.numeric(...)
status_2 numeric FALSE NA FALSE missing as.numeric(...)
startdate_2 Date TRUE POSIXct FALSE wrong_type as.Date(...)
enddate_2 Date TRUE POSIXct FALSE wrong_type as.Date(...)
scheduledate_2 Date FALSE NA FALSE missing as.Date(...)
record_id character TRUE character TRUE ok as.character(...)
uuid_2 character FALSE NA FALSE missing as.character(...)
supplementaldata_2 character FALSE NA FALSE missing as.character(...)
serializedresult_2 character FALSE NA FALSE missing as.character(...)
#collect relevant columns: POSIXct, Date & numeric
relevant_columns <- 
  coltype_check$details |> 
  pull(col)
#select relevant columns
data <- data |> select(any_of(relevant_columns))
#label variables
data <-
data |> 
  add_radio_factors(codebook, 
                    var_col = `Variable / Field Name`, 
                    type_col = `Field Type`,
                    levels_col = `Choices, Calculations, OR Slider Labels`
                    ) |> 
  add_col_labels(codebook, var_col = `Variable / Field Name`, label_col = `Field Label`) |> 
  relocate(record_id, any_of(codebook$`Variable / Field Name`)) |> 
  select(-c(enddate_2:last_col(), introduction_wellbeing))
Warning in add_col_labels(add_radio_factors(data, codebook, var_col = `Variable
/ Field Name`, : Labels provided for variables not in `data`: uuid_2,
scheduledate_2, status_2, supplementaldata_2, serializedresult_2

Set relevant dates

#if data was collected between 14:00 and 24:00, it is assigned to the same day.
#if collected between 00:00 and 13:59, it is assigned to the previous day.

data <- data |> diarydate(startdate_2)
attr(data$Date, "label") <- "Date"

Calculate WHO5 score

data <- data |> who5_scoring() 

Summarize results

table_wellbeingdiary(data)
Wellbeing diary (WHO-5) N N = 1801
I have felt cheerful and in good spirits 180
    At no time
1 (0.6%)
    Some of the time
18 (10%)
    Less than half of the time
23 (13%)
    More than half of the time
62 (34%)
    Most of the time
61 (34%)
    All of the time
15 (8.3%)
I have felt calm and relaxed 180
    At no time
1 (0.6%)
    Some of the time
11 (6.1%)
    Less than half of the time
30 (17%)
    More than half of the time
57 (32%)
    Most of the time
74 (41%)
    All of the time
7 (3.9%)
I have felt active and vigorous 180
    At no time
8 (4.4%)
    Some of the time
21 (12%)
    Less than half of the time
33 (18%)
    More than half of the time
63 (35%)
    Most of the time
46 (26%)
    All of the time
9 (5.0%)
How would you rate the quality of your sleep last night? 180
    Very poor
8 (4.4%)
    Poor
20 (11%)
    Fair
67 (37%)
    Good
71 (39%)
    Very good
14 (7.8%)
My daily life has been filled with things that interest me 180
    At no time
1 (0.6%)
    Some of the time
7 (3.9%)
    Less than half of the time
21 (12%)
    More than half of the time
59 (33%)
    Most of the time
59 (33%)
    All of the time
33 (18%)
WHO-5 percentage score (calculated, 0-100) 180 68 (56, 76)
Items 4 deviates from the WHO-5 questionnaire. Towards the score it is coded as follows. 1: Very poor | 2: Poor | 3: Fair | 4: Good | 5: Very good.
1 n (%); Median (Q1, Q3)
gtsave(table_wellbeingdiary(data), filename = "../output/tables/table_wellbeingdiary.png", vwidth = 800)
file:////var/folders/9p/326_k3kx43qbn_cyl1rqfhb00000gn/T//RtmpPW4qWE/file12b5d41e23063.html screenshot completed

Export

data <- data |> rename(Id = record_id)
wellbeingdiary <- data
path <- "../data/imported/continuous/"
if(!dir.exists(path)) dir.create(path, recursive = TRUE)
save(wellbeingdiary, file = "../data/imported/continuous/wellbeingdiary.RData")