Import mHLEA

Author

Johannes Zauner

Preface

This document imports the mHLEA evening diary (light exposure) and shows descriptive statistics for the site.

Setup

library(tidyverse)
library(LightLogR)
Warning: package 'LightLogR' was built under R version 4.5.2
library(glue)
library(readxl)
library(gt)
library(gtsummary)
source("../project_globals.R")
remote <- 
  "https://raw.githubusercontent.com/MeLiDosProject/Data_Metadata_Conventions/main/scripts/"

c("labeling",
  "radio_factors",
  "time_summaries",
  "prepare_codebook",
  "filefinder",
  "add_label",
  "who5_scoring",
  "general_parameters",
  "coltype_checker",
  "diarydate",
  "tables"
) |> walk(\(x) source(paste0(remote, x, ".R")))

Preparation

#collect codebook
codebook <- prepare_codebook("MeLiDosEveningDiaries_DataDictionary_2024-10-16.csv", 
                             form.filter = c("light_exposure_diary", "form_1"))
#collect files
files <- filefinder("mHLEA_digital", continuous = TRUE, individual = TRUE)
#import files
data <- 
  read_csv2(files, show_col_types = FALSE) |> 
  drop_na(redcap_repeat_instance) |> 
  mutate(
  across(c(startdate, enddate), 
           \(x) parse_date_time(x, c("ymdHMS", "dmyHM")))
    ) |> 
      mutate(record_id = paste0("MPI_S", record_id))
ℹ Using "','" as decimal and "'.'" as grouping mark. Use `read_delim()` for more control.
#check column types
coltype_check <- coltype_checker(codebook, data)
coltype_check$details |> gt()
col expected present actual type_ok issue expected_example
mhla_confirm numeric TRUE numeric TRUE ok as.numeric(...)
mhlea_confidence numeric TRUE numeric TRUE ok as.numeric(...)
status numeric FALSE NA FALSE missing as.numeric(...)
scheduledate Date FALSE NA FALSE missing as.Date(...)
record_id character TRUE character TRUE ok as.character(...)
uuid character FALSE NA FALSE missing as.character(...)
supplementaldata character FALSE NA FALSE missing as.character(...)
serializedresult character FALSE NA FALSE missing as.character(...)
#collect relevant columns: POSIXct, Date & numeric
relevant_columns <- 
  coltype_check$details |> 
  pull(col) |> 
  union("startdate")
#select relevant columns
data <- data |> select(any_of(relevant_columns))
#label variables
data <-
data |> 
  add_radio_factors(codebook, 
                    var_col = `Variable / Field Name`, 
                    type_col = `Field Type`,
                    levels_col = `Choices, Calculations, OR Slider Labels`
                    ) |> 
  add_col_labels(codebook, var_col = `Variable / Field Name`, label_col = `Field Label`) |> 
  select(record_id, startdate, mhlea_confidence)
Warning in add_col_labels(add_radio_factors(data, codebook, var_col = `Variable
/ Field Name`, : Labels provided for variables not in `data`: uuid, enddate,
scheduledate, status, supplementaldata, serializedresult

Set relevant dates

#if data was collected between 14:00 and 24:00, it is assigned to the same day.
#if collected between 00:00 and 13:59, it is assigned to the previous day.

data <- data |> diarydate(startdate)
attr(data$Date, "label") <- "Date"

Import paper diaries

#collect files
files <- 
  filefinder("mHLEA_paper", continuous = TRUE, individual = TRUE)
files <- 
  files |> subset(str_detect(files, "upload", negate = TRUE))
file_ids <- files |> basename() |> str_extract(paste0(".{3}"))
file_ids <- paste0(site, "_S", file_ids)
files <- files |> set_names(file_ids) 
#import files
data_paper <-
  map(files, read_excel) |> 
    map(\(x) if(nrow(x) > 0) {
      if(!"activity" %in% names(x)) x <- x |> mutate(activity = NA_character_)
      if(!"activity_desc" %in% names(x)) x <- x |> mutate(activity_desc = NA_character_)
      if(!"lightsource" %in% names(x)) {
        x <- 
          x |> 
          mutate(lightsource = paste(main_light, second_light)) |> 
          select(-c(main_light, second_light))
        }
      x |> 
          mutate(activity = as.character(activity))
    }
        )
data_paper <-
  data_paper[which(!map_lgl(data_paper, is.null))] |> 
  list_rbind(names_to = "record_id") |> 
  mutate(timestamp = 
           round_date(timestamp, unit = "hour") |> 
           force_tz(tzs[[site]]),
         Date = date(timestamp),
         start = timestamp,
         end = timestamp + dhours(1),
         .before = timestamp) |> 
  select(-timestamp) |> 
  unite("activity_desc", activity_desc, 
        activity_specify, na.rm = TRUE, sep = "") |> 
  mutate(activity_desc = case_when(activity_desc != "" ~ activity_desc))

Combine paper diaries with confidence rating

data <- 
data_paper |> 
  left_join(data, by = c("record_id", "Date")) |> 
  relocate(startdate, .after = last_col()) |> 
  mutate(lightsource = str_replace_all(lightsource, "X|x", "D"),
         lightsource = str_remove_all(lightsource, "NA"),
         lightsource = case_when(lightsource != " " ~ lightsource))

Translate comments into english

Native language is translated into English with AI and later checked by a site researcher.

# 
# library(ellmer)
# 
# #Providing the relevant codebook portions
# chat <- chat_openai(paste0("Clean the dataset according to the instructions in the output structure."))
# 
# #Providing the input
# data_red1 <-
# data|>
#   select(record_id, start, activity_desc) |>
#   filter(!is.na(activity_desc))
# 
# data_red <- data_red1 |>
#   pmap(~ paste(paste(names(data_red1), c(...), sep = ": "), collapse = ", "))
# 
# #creating an output structure
# type_data <- type_object(
#   record_id = type_string("copy the record_id information"),
#   start = type_string("copy the start information as 'YYYY-MM-DD HH:SS:MM'"),
#   activity_desc_english = type_string("Translate (or if in english already, copy) the 'activity_desc' column here. Question: Please specify your activity.", required = FALSE),
# )
# 
# data_llm <-
# parallel_chat_structured(
#   chat,
#   data_red,
#   type = type_data
# )
# 
# #Ensure that no NA is caught as string
# data_llm <-
#   data_llm |>
#   mutate(across(everything(), \(x) case_when(x == "NA" ~ NA, .default = x)))
# 
# #check that input and output are identical
# stopifnot("Input must by identical to output check" =
#             all(data_llm$record_id == data_red$record_id, na.rm = TRUE))
# stopifnot("Input must by identical to output check" =
#             all(data_llm$start == data_red$start, na.rm = TRUE))
# 
# data_llm$start <- data_red1$start
# 
# data_llm$activity_desc <- data_red1$activity_desc
# 
# path <- "../data/AI_translations/"
# if(!dir.exists(path)) dir.create(path, recursive = TRUE)
# write_csv(data_llm, "../data/AI_translations/lightexposurediary.csv")
data_llm <- 
  read_csv("../data/AI_translations/lightexposurediary.csv") |> 
  select(activity_desc, activity_desc_english) |> 
  distinct(activity_desc, .keep_all = TRUE)
Rows: 54 Columns: 4
── Column specification ────────────────────────────────────────────────────────
Delimiter: ","
chr  (3): record_id, activity_desc_english, activity_desc
dttm (1): start

ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
#add output to original
data <- data |> left_join(data_llm, by = c("activity_desc"))
data <-
  data |> 
  mutate(across(ends_with("english"), \(x) add_label(x, "Please specify your activity (English translation)")
                ))

Dissecting answers

data <-
data |> 
  relocate(lightsource, activity, .after = end) |> 
  filter(!is.na(lightsource) | !is.na(activity)) |> 
  mutate(lightsource = str_remove_all(lightsource, "[^[:alpha:]]"),
         lightsource = str_replace_all(lightsource, "(.)\\1+", "\\1"),
         lightsource = str_replace(lightsource, "DW", "W"),
         light_electric_indoor = str_detect(lightsource, "L|l"),
         light_electric_outdoor = str_detect(lightsource, "S|s"),
         light_daylight_indoor = str_detect(lightsource, "I|i"),
         light_daylight_outdoor = str_detect(lightsource, "O|o"),
         light_display = str_detect(lightsource, "E|e"),
         light_sleep_darkness = str_detect(lightsource, "D|d"),
         light_sleep_imission = str_detect(lightsource, "W|w"),
         .after = lightsource) |> 
  separate_wider_position(lightsource, 
                          widths = c("primary" = 1, "secondary" = 1),
                          names_sep = "_",
                          too_few = "align_start",
                          too_many = "drop",
           ) |> 
  mutate(activity = str_remove_all(activity, "[^[:digit:]]"),
         act_sleep = str_detect(activity, "1"),
         act_home = str_detect(activity, "2"),
         act_road_vehicle = str_detect(activity, "3"),
         act_road_open = str_detect(activity, "4"),
         act_working_indoor = str_detect(activity, "5"),
         act_working_outdoor = str_detect(activity, "6"),
         act_free_outdoor = str_detect(activity, "7"),
         act_other = str_detect(activity, "8"),
         .after = activity
         ) |> 
  select(-activity)

Adding labels:

factor_levels_mHLEA <- c(
    L = "Electric light source indoors",
    S = "Electric light source outdoors",
    I = "Daylight indoors",
    O = "Daylight outdoors (including shade)",
    E = "Emissive display light",
    D = "Darkness during sleep",
    W = "Light entering from outside during sleep"
)
data <-
data |> 
  mutate(
    across(
      contains("lightsource"), 
               \(x) x |> 
                    str_to_upper() |> 
                    factor( 
                           levels = names(factor_levels_mHLEA),
                           labels = factor_levels_mHLEA
               )
      )
    )
labels_mHLEA <- 
  c(
    record_id = "Record ID",
    Date = "Date",
    start = "Beginning timestamp",
    end = "Ending timestamp",
    lightsource_primary = "Primary lightsource (mH-LEA)",
    lightsource_secondary = "Secondary lightsource (mH-LEA)",
    light_electric_indoor = "Electric light source indoors (mH-LEA)",
    light_electric_outdoor = "Electric light source outdoors (mH-LEA)",
    light_daylight_indoor = "Daylight indoors (mH-LEA)",
    light_daylight_outdoor = "Daylight outdoors (including shade) (mH-LEA)",
    light_display = "Emissive display light (mH-LEA)",
    light_sleep_darkness = "Darkness during sleep (mH-LEA)",
    light_sleep_imission = "Light entering from outside during sleep (mH-LEA)",
    act_sleep = "Sleeping in bed (activity)",
    act_home = "Awake at home (activity)",
    act_road_vehicle = "On the road with public transport/car (activity)",
    act_road_open = "On the road with bike/on foot (activity)",
    act_working_indoor = "Working in the office/from home (activity)",
    act_working_outdoor = "Working outdoors (including lunch break outdoors) (activity)",
    act_free_outdoor = "Free time outdoors (activity)",
    act_other = "Other (activity)",
    act_desc = "Other activity description",
    act_desc_english = "Other activity (english translation)",
    startdate = "Starting time to fill in questionnaire"
  )

data <- add_labels(data, labels_mHLEA)

Summarize results

table <- 
  data |> 
  select(-c(start, end, Date, activity_desc, activity_desc_english, startdate)) |> 
  table_general("Light exposure (mH-LEA) and activity diary")
table
Light exposure (mH-LEA) and activity diary N N = 4,1611
Primary lightsource (mH-LEA) 4,161
    Electric light source indoors
1,041 (25%)
    Electric light source outdoors
55 (1.3%)
    Daylight indoors
1,264 (30%)
    Daylight outdoors (including shade)
320 (7.7%)
    Emissive display light
67 (1.6%)
    Darkness during sleep
1,103 (27%)
    Light entering from outside during sleep
311 (7.5%)
Secondary lightsource (mH-LEA) 1,464
    Electric light source indoors
316 (22%)
    Electric light source outdoors
19 (1.3%)
    Daylight indoors
145 (9.9%)
    Daylight outdoors (including shade)
82 (5.6%)
    Emissive display light
868 (59%)
    Darkness during sleep
31 (2.1%)
    Light entering from outside during sleep
3 (0.2%)
    missing
2,697
Electric light source indoors (mH-LEA) 4,161 1,357 (33%)
Electric light source outdoors (mH-LEA) 4,161 74 (1.8%)
Daylight indoors (mH-LEA) 4,161 1,409 (34%)
Daylight outdoors (including shade) (mH-LEA) 4,161 402 (9.7%)
Emissive display light (mH-LEA) 4,161 935 (22%)
Darkness during sleep (mH-LEA) 4,161 1,134 (27%)
Light entering from outside during sleep (mH-LEA) 4,161 314 (7.5%)
Sleeping in bed (activity) 2,129 682 (32%)
    missing
2,032
Awake at home (activity) 2,129 629 (30%)
    missing
2,032
On the road with public transport/car (activity) 2,129 76 (3.6%)
    missing
2,032
On the road with bike/on foot (activity) 2,129 66 (3.1%)
    missing
2,032
Working in the office/from home (activity) 2,129 567 (27%)
    missing
2,032
Working outdoors (including lunch break outdoors) (activity) 2,129 15 (0.7%)
    missing
2,032
Free time outdoors (activity) 2,129 62 (2.9%)
    missing
2,032
Other (activity) 2,129 82 (3.9%)
    missing
2,032
How sure are you about the light exposure categories you chose? 267
    Not confident at all
0 (0%)
    Slightly confident
8 (3.0%)
    Somewhat confident
48 (18%)
    Fairly confident
106 (40%)
    Completely confident
105 (39%)
    missing
3,894
1 n (%)
gtsave(table |> as_gt(), filename = "../output/tables/table_lightexposurediary.png", vwidth = 800)
file:////var/folders/9p/326_k3kx43qbn_cyl1rqfhb00000gn/T//RtmpeE9oT6/fileacec3de5fecf.html screenshot completed

Export

data <- data |> rename(Id = record_id)
lightexposurediary <- data
path <- "../data/imported/continuous/"
if(!dir.exists(path)) dir.create(path, recursive = TRUE)
save(lightexposurediary, file = "../data/imported/continuous/lightexposurediary.RData")