Import exercise diaries

Author

Johannes Zauner

Preface

This document imports the exercisediary and shows descriptive statistics for the site.

Setup

library(tidyverse)
Warning: package 'ggplot2' was built under R version 4.5.2
Warning: package 'tibble' was built under R version 4.5.2
Warning: package 'tidyr' was built under R version 4.5.2
Warning: package 'purrr' was built under R version 4.5.2
Warning: package 'dplyr' was built under R version 4.5.2
library(LightLogR)
library(glue)
Warning: package 'glue' was built under R version 4.5.2
library(readxl)
library(gt)
library(gtsummary)

remote <- 
  "https://raw.githubusercontent.com/MeLiDosProject/Data_Metadata_Conventions/main/scripts/"

c("labeling",
  "radio_factors",
  "time_summaries",
  "prepare_codebook",
  "filefinder",
  "general_parameters",
  "coltype_checker",
  "diarydate",
  "tables"
) |> walk(\(x) source(paste0(remote, x, ".R")))
Warning: package 'rlang' was built under R version 4.5.2

Preparation

#collect codebook
codebook <- prepare_codebook("MeLiDosEveningDiaries_DataDictionary_2024-10-16.csv", form.filter = "exercise_diary")
#collect files
files <- filefinder("exercisediary", continuous = TRUE, individual = TRUE)
#import files
data <- read_csv(files, show_col_types = FALSE) |> 
  rename_with(\(x) str_remove(x, "_v2$")) |> 
  drop_na(redcap_repeat_instance)
#check column types
coltype_check <- coltype_checker(codebook, data)
coltype_check$details |> gt()
col expected present actual type_ok issue expected_example
instructions numeric TRUE numeric TRUE ok as.numeric(...)
intensity numeric TRUE numeric TRUE ok as.numeric(...)
location numeric TRUE numeric TRUE ok as.numeric(...)
commute numeric TRUE numeric TRUE ok as.numeric(...)
sedentary numeric TRUE numeric TRUE ok as.numeric(...)
status_3 numeric TRUE numeric TRUE ok as.numeric(...)
light_glasses logical TRUE numeric FALSE wrong_type as.logical(...)
startdate_3 Date TRUE POSIXct FALSE wrong_type as.Date(...)
enddate_3 Date TRUE POSIXct FALSE wrong_type as.Date(...)
scheduledate_3 Date TRUE logical FALSE wrong_type as.Date(...)
type character TRUE character TRUE ok as.character(...)
uuid_3 character TRUE character TRUE ok as.character(...)
supplementaldata_3 character TRUE character TRUE ok as.character(...)
serializedresult_3 character TRUE character TRUE ok as.character(...)
#collect relevant columns: POSIXct, Date & numeric
relevant_columns <- 
  coltype_check$details |> 
  pull(col)
#add specific character columns
relevant_columns <- c("record_id", relevant_columns)
#select relevant columns
data <- data |> select(any_of(relevant_columns))
#label variables
data <-
data |> 
  add_radio_factors(codebook, 
                    var_col = `Variable / Field Name`, 
                    type_col = `Field Type`,
                    levels_col = `Choices, Calculations, OR Slider Labels`
                    ) |> 
  add_col_labels(codebook, var_col = `Variable / Field Name`, label_col = `Field Label`) |> 
  relocate(record_id, any_of(codebook$`Variable / Field Name`)) |> 
  select(-uuid_3, -serializedresult_3, -supplementaldata_3, -status_3, -scheduledate_3)

attr(data$record_id, "label") <- "Record ID"

Translate comments into english

Native language is translated into English with AI and later checked by a site researcher.

# library(ellmer)
# 
# #Providing the relevant codebook portions
# codebook_red <-
# codebook|>
#   pmap(~ paste(paste(names(codebook), c(...), sep = ": "), collapse = ", ")) |>
#   list_c() |>
#   paste0(collapse = "newline: ")
# 
# chat <- chat_openai(paste0("Clean the dataset according to the instructions in the output structure. Here is the codebook: "))
# 
# #Providing the input
# data_red <-
# data|>
#   select(record_id, type)
# data_red <-
#   data_red |>
#   pmap(~ paste(paste(names(data_red), c(...), sep = ": "), collapse = ", "))
# 
# #creating an output structure
# type_data <- type_object(
#   record_id = type_string("use the record_id information"),
#   type = type_string("copy the type information here", required = FALSE),
#   type_english = type_string("translation of the type into english", required = FALSE)
# )
# 
# data_llm <-
# parallel_chat_structured(
#   chat,
#   data_red,
#   type = type_data
# )
# 
# #Ensure that no NA is caught as string
# data_llm <-
#   data_llm |>
#   mutate(type = case_when(type == "NA" ~ NA, .default = type),
#          type_english = case_when(type_english == "NA" ~ NA, .default = type_english))
# 
# #check that input and output are identical
# stopifnot("Input must by identical to output check" =
#             all(data_llm$type == data$type, na.rm = TRUE))
# stopifnot("Input must by identical to output check" =
#             all(data_llm$record_id == data$record_id, na.rm = TRUE))
# 
# data_llm <-
#   data_llm |>
#   distinct(record_id, type, .keep_all = TRUE)
# 
# path <- "../data/AI_translations/"
# if(!dir.exists(path)) dir.create(path, recursive = TRUE)
# write_csv(data_llm, "../data/AI_translations/exercisediary.csv")
data_llm <- 
  read_csv("../data/AI_translations/exercisediary.csv")
Rows: 80 Columns: 3
── Column specification ────────────────────────────────────────────────────────
Delimiter: ","
chr (3): record_id, type, type_english

ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
#add output to original
data <- data |> left_join(data_llm, by = c("record_id", "type"))

Set relevant dates

#if data was collected between 14:00 and 24:00, it is assigned to the same day.
#if collected between 00:00 and 13:59, it is assigned to the previous day.

data <- data |> diarydate(startdate_3)
attr(data$Date, "label") <- "Date"

Adjust minute column names

data <-
  data |> 
  mutate(across(c(commute, sedentary), \(x) as.difftime(x, unit = "mins")))

data <-
data |> 
  add_col_labels(codebook, var_col = `Variable / Field Name`, label_col = `Field Label`)
Warning in add_col_labels(data, codebook, var_col = `Variable / Field Name`, :
Labels provided for variables not in `data`: uuid_3, scheduledate_3, status_3,
supplementaldata_3, serializedresult_3

Summarize results

table_exercisediary(data)
Evening exercise diary N N = 1191
My day involved the following type of physical activity 119
    Vigorous (causing large increases in heart rate and breathing, e.g. running)
9 (7.6%)
    Moderate (causing moderate increases in heart rate and breathing, e.g. cycling in the city)
19 (16%)
    Light (causing small to no increases in heart rate and breathing, e.g. taking a stroll in the park)
41 (34%)
    None of the above, I did not perform any type of physical activity
50 (42%)
I performed physical activity 73
    Outdoors (e.g. running, cycling in the city)
38 (52%)
    Indoors (e.g. gym or home workout)
21 (29%)
    Both indoors and outdoors
14 (19%)
    missing
46
How much time did you spend walking and/or cycling to get to and from places? Please answer in minutes 119 45 mins (20 mins, 90 mins)
How much time did you spend sitting or reclining? Please answer in minutes 119 360 mins (240 mins, 480 mins)
Did you wear the light glasses while doing exercises? 69 41 (59%)
    missing
50
1 n (%); Median (Q1, Q3)
gtsave(table_exercisediary(data) |> as_gt(), filename = "../output/tables/table_exercisediary.png", vwidth = 800)
file:////var/folders/9p/326_k3kx43qbn_cyl1rqfhb00000gn/T//RtmpBQu6Y5/file37411dd8d488.html screenshot completed

Export

data <- data |> rename(Id = record_id)
exercisediary <- data
path <- "../data/imported/continuous/"
if(!dir.exists(path)) dir.create(path, recursive = TRUE)
save(exercisediary, file = "../data/imported/continuous/exercisediary.RData")