Import exercise diaries

Author

Johannes Zauner

Preface

This document imports the exercisediary and shows descriptive statistics for the site.

Setup

library(tidyverse)
Warning: package 'tidyr' was built under R version 4.5.2
Warning: package 'dplyr' was built under R version 4.5.2
library(LightLogR)
Warning: package 'LightLogR' was built under R version 4.5.2
library(glue)
library(readxl)
library(gt)
library(gtsummary)

remote <- 
  "https://raw.githubusercontent.com/MeLiDosProject/Data_Metadata_Conventions/main/scripts/"

c("labeling",
  "radio_factors",
  "time_summaries",
  "prepare_codebook",
  "filefinder",
  "general_parameters",
  "coltype_checker",
  "diarydate",
  "tables"
) |> walk(\(x) source(paste0(remote, x, ".R")))
Warning: package 'rlang' was built under R version 4.5.2

Preparation

#collect codebook
codebook <- prepare_codebook("MeLiDosEveningDiaries_DataDictionary_2024-10-16.csv", form.filter = "exercise_diary")
#collect files
files <- filefinder("exercisediary", continuous = TRUE, individual = TRUE)
#import files
data1 <-
  read_csv2(files[c(1:3, 6:26)], show_col_types = FALSE) |> 
  drop_na(redcap_repeat_instance) |> 
  mutate(across(c(startdate_3, enddate_3), \(x) parse_date_time(x, c("dmyHM"))),
         commute = parse_number(commute),
         record_id = paste0("MPI_S", record_id),
         sedentary = case_when(sedentary > 1000 ~ sedentary/10,
                               .default = sedentary),
         light_glasses = NA)
ℹ Using "','" as decimal and "'.'" as grouping mark. Use `read_delim()` for more control.
data2 <-
  read_csv2(files[4:5], show_col_types = FALSE) |> 
  drop_na(redcap_repeat_instance) |> 
  mutate(across(c(startdate_3, enddate_3), \(x) parse_date_time(x, c("ymdHMS"))),
         # commute = parse_number(commute),
         record_id = paste0("MPI_S", record_id),
         sedentary = case_when(sedentary > 1000 ~ sedentary/10,
                               .default = sedentary),
         light_glasses = NA)
ℹ Using "','" as decimal and "'.'" as grouping mark. Use `read_delim()` for more control.
data <- rbind(data1, data2) |> arrange(record_id, redcap_repeat_instance)
#check column types
coltype_check <- coltype_checker(codebook, data)
coltype_check$details |> gt()
col expected present actual type_ok issue expected_example
instructions numeric TRUE numeric TRUE ok as.numeric(...)
intensity numeric TRUE numeric TRUE ok as.numeric(...)
location numeric TRUE numeric TRUE ok as.numeric(...)
commute numeric TRUE character FALSE wrong_type as.numeric(...)
sedentary numeric TRUE numeric TRUE ok as.numeric(...)
status_3 numeric FALSE NA FALSE missing as.numeric(...)
light_glasses logical TRUE logical TRUE ok as.logical(...)
startdate_3 Date TRUE POSIXct FALSE wrong_type as.Date(...)
enddate_3 Date TRUE POSIXct FALSE wrong_type as.Date(...)
scheduledate_3 Date FALSE NA FALSE missing as.Date(...)
type character TRUE character TRUE ok as.character(...)
uuid_3 character FALSE NA FALSE missing as.character(...)
supplementaldata_3 character FALSE NA FALSE missing as.character(...)
serializedresult_3 character FALSE NA FALSE missing as.character(...)
#collect relevant columns: POSIXct, Date & numeric
relevant_columns <- 
  coltype_check$details |> 
  pull(col)
#add specific character columns
relevant_columns <- c("record_id", relevant_columns)
#select relevant columns
data <- data |> select(any_of(relevant_columns))
#label variables
data <-
data |> 
  add_radio_factors(codebook, 
                    var_col = `Variable / Field Name`, 
                    type_col = `Field Type`,
                    levels_col = `Choices, Calculations, OR Slider Labels`
                    ) |> 
  add_col_labels(codebook, var_col = `Variable / Field Name`, label_col = `Field Label`) |> 
  relocate(record_id, any_of(codebook$`Variable / Field Name`))
Warning in add_col_labels(add_radio_factors(data, codebook, var_col = `Variable
/ Field Name`, : Labels provided for variables not in `data`: uuid_3,
scheduledate_3, status_3, supplementaldata_3, serializedresult_3
attr(data$record_id, "label") <- "Record ID"

Translate comments into english

Native language is translated into English with AI and later checked by a site researcher.

# library(ellmer)
# 
# #Providing the relevant codebook portions
# codebook_red <-
# codebook|>
#   pmap(~ paste(paste(names(codebook), c(...), sep = ": "), collapse = ", ")) |>
#   list_c() |>
#   paste0(collapse = "newline: ")
# 
# chat <- chat_openai(paste0("Clean the dataset according to the instructions in the output structure. Here is the codebook: "))
# 
# #Providing the input
# data_red <-
# data|>
#   select(record_id, type)
# data_red <-
#   data_red |>
#   pmap(~ paste(paste(names(data_red), c(...), sep = ": "), collapse = ", "))
# 
# #creating an output structure
# type_data <- type_object(
#   record_id = type_string("use the record_id information"),
#   type = type_string("copy the type information here", required = FALSE),
#   type_english = type_string("translation of the type into english", required = FALSE)
# )
# 
# data_llm <-
# parallel_chat_structured(
#   chat,
#   data_red,
#   type = type_data
# )
# 
# #Ensure that no NA is caught as string
# data_llm <-
#   data_llm |>
#   mutate(type = case_when(type == "NA" ~ NA, .default = type),
#          type_english = case_when(type_english == "NA" ~ NA, .default = type_english))
# 
# #check that input and output are identical
# stopifnot("Input must by identical to output check" =
#             all(data_llm$type == data$type, na.rm = TRUE))
# stopifnot("Input must by identical to output check" =
#             all(data_llm$record_id == data$record_id, na.rm = TRUE))
# 
# data_llm <-
#   data_llm |>
#   distinct(record_id, type, .keep_all = TRUE)
# 
# path <- "../data/AI_translations/"
# if(!dir.exists(path)) dir.create(path, recursive = TRUE)
# write_csv(data_llm, "../data/AI_translations/exercisediary.csv")
data_llm <- 
  read_csv("../data/AI_translations/exercisediary.csv")
Rows: 118 Columns: 3
── Column specification ────────────────────────────────────────────────────────
Delimiter: ","
chr (3): record_id, type, type_english

ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
#add output to original
data <- data |> left_join(data_llm, by = c("record_id", "type"))

Set relevant dates

#if data was collected between 14:00 and 24:00, it is assigned to the same day.
#if collected between 00:00 and 13:59, it is assigned to the previous day.

data <- data |> diarydate(startdate_3)
attr(data$Date, "label") <- "Date"

Adjust minute column names

data <-
  data |> 
  mutate(across(c(commute, sedentary), \(x) as.difftime(x, unit = "mins")))

data <-
data |> 
  add_col_labels(codebook, var_col = `Variable / Field Name`, label_col = `Field Label`)
Warning in add_col_labels(data, codebook, var_col = `Variable / Field Name`, :
Labels provided for variables not in `data`: uuid_3, scheduledate_3, status_3,
supplementaldata_3, serializedresult_3

Summarize results

table_exercisediary(data)
Evening exercise diary N N = 1801
My day involved the following type of physical activity 180
    Vigorous (causing large increases in heart rate and breathing, e.g. running)
28 (16%)
    Moderate (causing moderate increases in heart rate and breathing, e.g. cycling in the city)
39 (22%)
    Light (causing small to no increases in heart rate and breathing, e.g. taking a stroll in the park)
69 (38%)
    None of the above, I did not perform any type of physical activity
44 (24%)
I performed physical activity 136
    Outdoors (e.g. running, cycling in the city)
86 (63%)
    Indoors (e.g. gym or home workout)
41 (30%)
    Both indoors and outdoors
9 (6.6%)
    missing
44
How much time did you spend walking and/or cycling to get to and from places? Please answer in minutes 0 NA (NA, NA)
    missing
180
How much time did you spend sitting or reclining? Please answer in minutes 180 480 mins (300 mins, 600 mins)
Did you wear the light glasses while doing exercises? 0 0 (NA%)
    missing
180
1 n (%); Median (Q1, Q3)
gtsave(table_exercisediary(data) |> as_gt(), filename = "../output/tables/table_exercisediary.png", vwidth = 800)
file:////var/folders/9p/326_k3kx43qbn_cyl1rqfhb00000gn/T//RtmpJwShOq/file43661e209dae.html screenshot completed

Export

data <- data |> rename(Id = record_id)
exercisediary <- data
path <- "../data/imported/continuous/"
if(!dir.exists(path)) dir.create(path, recursive = TRUE)
save(exercisediary, file = "../data/imported/continuous/exercisediary.RData")