Import wearlog

Author

Johannes Zauner

Preface

This document imports the wear log and shows descriptive statistics for the site.

Setup

library(tidyverse)
library(LightLogR)
Warning: package 'LightLogR' was built under R version 4.5.2
library(glue)
library(readxl)
library(gt)
library(gtsummary)

site <- "MPI"

remote <- 
  "https://raw.githubusercontent.com/MeLiDosProject/Data_Metadata_Conventions/main/scripts/"

c("labeling",
  "radio_factors",
  "time_summaries",
  "prepare_codebook",
  "filefinder",
  "general_parameters",
  "coltype_checker",
  "diarydate",
  "wearlog_plausibility",
  "tables"
) |> walk(\(x) source(paste0(remote, x, ".R")))

Preparation

#collect codebook
codebook <- prepare_codebook("MeLiDosWearLog_DataDictionary_2024-10-16.csv")
#collect files
files <- filefinder("wearlog", continuous = TRUE, individual = TRUE)
#import files
data <- 
  read_csv2(files, show_col_types = FALSE) |> 
    mutate(record_id = paste0("MPI_S", record_id),
    across(c(wearlog_off, wearlog_on, wearlog_bed, wearlog_past_off,
             wearlog_past_on, wearlog_past_sleep, radius_past_time,
             radius_past_back), 
           \(x) parse_date_time(x, "dmyHM"))
    )
ℹ Using "','" as decimal and "'.'" as grouping mark. Use `read_delim()` for more control.
#check column types
coltype_check <- coltype_checker(codebook, data)
coltype_check$details |> gt()
col expected present actual type_ok issue expected_example
wearlog_off POSIXct TRUE POSIXct TRUE ok as.POSIXct(..., tz = 'UTC')
wearlog_on POSIXct TRUE POSIXct TRUE ok as.POSIXct(..., tz = 'UTC')
wearlog_bed POSIXct TRUE POSIXct TRUE ok as.POSIXct(..., tz = 'UTC')
wearlog_past_off POSIXct TRUE POSIXct TRUE ok as.POSIXct(..., tz = 'UTC')
wearlog_past_on POSIXct TRUE POSIXct TRUE ok as.POSIXct(..., tz = 'UTC')
wearlog_past_sleep POSIXct TRUE POSIXct TRUE ok as.POSIXct(..., tz = 'UTC')
radius_past_time POSIXct TRUE POSIXct TRUE ok as.POSIXct(..., tz = 'UTC')
radius_past_back POSIXct TRUE POSIXct TRUE ok as.POSIXct(..., tz = 'UTC')
wearlog_type numeric TRUE numeric TRUE ok as.numeric(...)
wearlog_event numeric TRUE numeric TRUE ok as.numeric(...)
reason numeric FALSE NA FALSE missing as.numeric(...)
wearlog_past numeric TRUE numeric TRUE ok as.numeric(...)
status numeric FALSE NA FALSE missing as.numeric(...)
wearlog_bag logical TRUE numeric FALSE wrong_type as.logical(...)
wearlog_motion logical TRUE numeric FALSE wrong_type as.logical(...)
wearlog_bp logical TRUE numeric FALSE wrong_type as.logical(...)
wearlog_past_bag logical TRUE numeric FALSE wrong_type as.logical(...)
wearlog_past_motion logical TRUE numeric FALSE wrong_type as.logical(...)
scheduledate Date FALSE NA FALSE missing as.Date(...)
record_id character TRUE character TRUE ok as.character(...)
radius character TRUE character TRUE ok as.character(...)
wearlog_place character TRUE character TRUE ok as.character(...)
radius_past character TRUE logical FALSE wrong_type as.character(...)
wearlog_past_location character TRUE character TRUE ok as.character(...)
uuid character FALSE NA FALSE missing as.character(...)
supplementaldata character FALSE NA FALSE missing as.character(...)
serializedresult character FALSE NA FALSE missing as.character(...)
#collect relevant columns: POSIXct, Date & numeric
relevant_columns <- 
  coltype_check$details |> 
  pull(col)
#add specific character columns
relevant_columns <- c("record_id", relevant_columns)
#select relevant columns
data <- data |> select(any_of(relevant_columns))
#label variables
data <-
data |> 
  add_radio_factors(codebook, 
                    var_col = `Variable / Field Name`, 
                    type_col = `Field Type`,
                    levels_col = `Choices, Calculations, OR Slider Labels`
                    ) |> 
  add_col_labels(codebook, var_col = `Variable / Field Name`, label_col = `Field Label`) |> 
  relocate(record_id, any_of(codebook$`Variable / Field Name`))
Warning in add_radio_factors(data, codebook, var_col = `Variable / Field Name`,
: Radio variables provided but not in `data`: reason
Warning in add_col_labels(add_radio_factors(data, codebook, var_col = `Variable
/ Field Name`, : Labels provided for variables not in `data`: reason, uuid,
startdate, enddate, scheduledate, status, supplementaldata, serializedresult
#rename so that past is at the end
data <-
  data |> 
  rename_with(\(x) ifelse(str_detect(x, "_past"), 
                          str_remove(x, "_past") |> str_c("_past"),
                          x)) |> 
  rename(wearlog_bed_past = wearlog_sleep_past,
         wearlog_event_past = wearlog_past,
         wearlog_place_past = wearlog_location_past)

#bring past and present events together
past_columns <- 
  data |> select(contains("_past")) |>  names() |> str_remove("_past")

data <-
data |> 
  pivot_longer(any_of(c(past_columns, paste0(past_columns, "_past"))),
               names_to = c(".value", "is_past"),
               names_pattern = "^(.*?)(?:(_past))?$",
               values_drop_na = FALSE
               ) |> 
  mutate(is_past = is_past != "", reason = NA) |> 
  pivot_longer(c(wearlog_on, wearlog_off, wearlog_bed,
                 radius_time, radius_back),
               names_to = "event",
               values_to = "start"
               ) |>
  drop_na(start) |>
  relocate(.after = record_id,
           start, event, wearlog_event, is_past, wearlog_type, radius) |>
  mutate(event = case_match(event,
                            "radius_time" ~ "site_leave",
                            "radius_back" ~ "site_return",
                            "wearlog_on" ~ "on",
                            "wearlog_off" ~ "off",
                            "wearlog_bed" ~ "sleep")) |>
  mutate(across(where(is.POSIXct), \(x) force_tz(x, tzs[[site]])))

Plausibility checking

Here, we perform some basic plausibility checking for the wearlog:

  1. Sorting by record_id and Datetime

  2. Set to plausible if:

  • site leave and return are consecutive events?
  • wearlog off event is followed by on within 16 hours?
  • wearlog off event is followed by sleep within 4 hours?
  • wearlog sleep event is followed by on within 1 day?
  • wearlog sleep event is followed by off within 1 day?
  • wearlog on event is preceded by either off or sleep within 12 hours?
data <- data |> wearlog_plausibility()

Durations for different conditions

Site

data_site <- 
data |> 
  filter(event %in% c("site_leave", "site_return")) |> 
  mutate(.by = record_id,
         duration = difftime(lead(start), start, unit = "mins"),
         end = start + duration,
         .after = event) |> 
  filter(event == "site_leave", duration != 0, is.plausible)

Wear

data_wear <-
data |> 
  filter(event %in% c("on", "off", "sleep")) |> 
  mutate(.by = record_id,
         duration = difftime(lead(start), start, unit = "mins"),
         end = start + duration,
         .after = event) |> 
  filter(event %in% c("off", "sleep"), duration != 0, is.plausible)

Join

data <-
  rbind(data_wear, data_site) |> 
  arrange(record_id, start) |> 
  select(-c(is.plausible, is_past)) |> 
  rename(state = event)

Translate comments into english

Native language is translated into English with AI and later checked by a site researcher.

# library(ellmer)
# 
# #Providing the relevant codebook portions
# codebook_red <-
# codebook|>
#   pmap(~ paste(paste(names(codebook), c(...), sep = ": "), collapse = ", ")) |>
#   list_c() |>
#   paste0(collapse = "newline: ")
# 
# chat <- chat_openai(paste0("Clean the dataset according to the instructions in the output structure. Here is the codebook: "))
# 
# #Providing the input
# data_red <-
# data|>
#   select(record_id, wearlog_place)
# data_red <-
#   data_red |>
#   pmap(~ paste(paste(names(data_red), c(...), sep = ": "), collapse = ", "))
# 
# #creating an output structure
# type_data <- type_object(
#   record_id = type_string("use the record_id information"),
#   wearlog_place = type_string("copy the original information here", required = FALSE),
#   wearlog_place_english = type_string("translation of the original information into english", required = FALSE)
# )
# 
# data_llm <-
# parallel_chat_structured(
#   chat,
#   data_red,
#   type = type_data
# )
# 
# #Ensure that no NA is caught as string
# data_llm <-
#   data_llm |>
#   mutate(wearlog_place = case_when(wearlog_place == "NA" ~ NA, .default = wearlog_place),
#          wearlog_place_english = case_when(wearlog_place_english == "NA" ~ NA, .default = wearlog_place_english))
# 
# #check that input and output are identical
# stopifnot("Input must by identical to output check" =
#             all(data_llm$wearlog_place == data$wearlog_place, na.rm = TRUE))
# stopifnot("Input must by identical to output check" =
#             all(data_llm$record_id == data$record_id, na.rm = TRUE))
# 
# data_llm <-
#   data_llm |>
#   distinct(record_id, wearlog_place, .keep_all = TRUE)
# 
# path <- "../data/AI_translations/"
# if(!dir.exists(path)) dir.create(path, recursive = TRUE)
# write_csv(data_llm, "../data/AI_translations/wearlog.csv")
data_llm <- 
  read_csv("../data/AI_translations/wearlog.csv")
Rows: 58 Columns: 3
── Column specification ────────────────────────────────────────────────────────
Delimiter: ","
chr (3): record_id, wearlog_place, wearlog_place_english

ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
#add output to original
data <- data |> left_join(data_llm, by = c("record_id", "wearlog_place"))

Labels

data <-
data |> 
  add_col_labels(codebook, var_col = `Variable / Field Name`, label_col = `Field Label`)
Warning in add_col_labels(data, codebook, var_col = `Variable / Field Name`, :
Labels provided for variables not in `data`: wearlog_off, wearlog_on,
wearlog_bed, wearlog_past, wearlog_past_off, wearlog_past_on,
wearlog_past_sleep, radius_past_time, radius_past, radius_past_back,
wearlog_past_bag, wearlog_past_location, wearlog_past_motion, uuid, startdate,
enddate, scheduledate, status, supplementaldata, serializedresult
attr(data$start, "label") <- "Start datetime of the state"
attr(data$state, "label") <- "Wearlog state"
attr(data$duration, "label") <- "Duration of the state"
attr(data$end, "label") <- "End datetime of the state"
attr(data$wearlog_event, "label") <- "Description of the starting event"
attr(data$wearlog_type, "label") <- "Logging a current or past event"
attr(data$wearlog_place_english, "label") <- "Placement of the light glasses"

Summarize results

table_wearlog(data)
Wearlog N N = 3721
Wearlog state 372
    off
195 (52%)
    sleep
177 (48%)
Duration of the state 372 174 mins (35 mins, 500 mins)
Logging a current or past event 372
    Current event
348 (94%)
    Past event that I forgot to log before
24 (6.5%)
Did you press the button on the light glasses? 24 17 (71%)
    missing
348
Are the light glasses in the black bag we provided you with? 195 140 (72%)
    missing
177
Are the light glasses in motion (e.g.: in your backpack as you bike, in your pocket, on the bus)? 195 26 (13%)
    missing
177
What is prompting you to remove the light glasses? 0 0 (NA%)
    missing
372
1 n (%); Median (Q1, Q3)
gtsave(table_wearlog(data) |> as_gt(), filename = "../output/tables/table_wearlog.png", vwidth = 800)
file:////var/folders/9p/326_k3kx43qbn_cyl1rqfhb00000gn/T//RtmpfCtHWT/file3a875dbd169e.html screenshot completed

Export

data <- data |> rename(Id = record_id)
wearlog <- data
path <- "../data/imported/continuous/"
if(!dir.exists(path)) dir.create(path, recursive = TRUE)
save(wearlog, file = "../data/imported/continuous/wearlog.RData")