Import demographics

Author

Johannes Zauner

Preface

This document imports the demographics and shows descriptive statistics for the site.

Setup

library(tidyverse)
Warning: package 'tidyr' was built under R version 4.5.2
Warning: package 'dplyr' was built under R version 4.5.2
library(LightLogR)
Warning: package 'LightLogR' was built under R version 4.5.2
library(glue)
library(readxl)
library(gt)
library(gtsummary)

remote <- 
  "https://raw.githubusercontent.com/MeLiDosProject/Data_Metadata_Conventions/main/scripts/"

c("labeling",
  "radio_factors",
  "prepare_codebook",
  "filefinder",
  "general_parameters",
  "coltype_checker",
  "tables"
) |> walk(\(x) source(paste0(remote, x, ".R")))
Warning: package 'rlang' was built under R version 4.5.2

Preparation

#collect codebook
codebook <- 
  prepare_codebook("MeLiDosScreeningQuestionnaire_DataDictionary_2024-10-16.csv",
                   c("online_screening_consent_form", "your_demographics")) |> 
  filter(!`Variable / Field Name` %in% c("email", "email_2"))
#collect files
files <- filefinder("demographics",individual = FALSE, negate = "health")
#import files
data <- 
  read_tsv(files, show_col_types = FALSE) |> 
  drop_na(redcap_repeat_instance) |> 
  mutate(native_language = as.logical(native_language),
         record_id = paste0("MPI_S", record_id))
#check column types
coltype_check <- coltype_checker(codebook, data)
coltype_check$details |> gt()
col expected present actual type_ok issue expected_example
consent numeric FALSE NA FALSE missing as.numeric(...)
age numeric TRUE numeric TRUE ok as.numeric(...)
sex numeric TRUE numeric TRUE ok as.numeric(...)
gender numeric TRUE numeric TRUE ok as.numeric(...)
language_specification numeric TRUE numeric TRUE ok as.numeric(...)
employment_status numeric TRUE numeric TRUE ok as.numeric(...)
native_language logical TRUE logical TRUE ok as.logical(...)
record_id character TRUE character TRUE ok as.character(...)
explanation character FALSE NA FALSE missing as.character(...)
comments character TRUE character TRUE ok as.character(...)
#collect relevant columns
relevant_columns <- 
  coltype_check$details |> 
  pull(col)
#select relevant columns
data <- data |> select(any_of(relevant_columns))
#label variables
data <-
data |> 
  add_radio_factors(codebook, 
                    var_col = `Variable / Field Name`, 
                    type_col = `Field Type`,
                    levels_col = `Choices, Calculations, OR Slider Labels`
                    ) |> 
  add_col_labels(codebook, var_col = `Variable / Field Name`, label_col = `Field Label`)
Warning in add_radio_factors(data, codebook, var_col = `Variable / Field Name`,
: Radio variables provided but not in `data`: consent
Warning in add_col_labels(add_radio_factors(data, codebook, var_col = `Variable
/ Field Name`, : Labels provided for variables not in `data`: explanation,
consent
#reorder variables
data <- 
  data |> 
  dplyr::relocate(native_language, .before = language_specification) |> 
  relocate(record_id)

Summarize results

table_demographics(data)
Demographics N N = 261
Current age in years 26 27.0 (25.0, 29.0)
Sex assigned at birth 26
    Female
14 (54%)
    Male
12 (46%)
    Intersex
0 (0%)
    Prefer not to say
0 (0%)
Gender identity 26
    Woman
14 (54%)
    Man
12 (46%)
    Non-binary
0 (0%)
    Other
0 (0%)
    Prefer not to say
0 (0%)
Is the questionnaire language your native language? 26 5 (19%)
If your native language is not the questionnaire language, please specify your language proficiency for it 21
    Advanced
18 (86%)
    Intermediate
3 (14%)
    Basic
0 (0%)
    missing
5
Employment 26
    Full time employed
6 (23%)
    Part time employed
3 (12%)
    Marginally employed (Minijob)
0 (0%)
    Not employed but studying or in training
10 (38%)
    Studying and employed
7 (27%)
    Not employed
0 (0%)
1 Median (Q1, Q3); n (%)
gtsave(table_demographics(data) |> as_gt(), filename = "../output/tables/table_demographics.png", vwidth = 800)
file:////var/folders/9p/326_k3kx43qbn_cyl1rqfhb00000gn/T//RtmpPH3O3L/file114b413312c47.html screenshot completed

Export

data <- data |> rename(Id = record_id)
demographics <- data
path <- "../data/imported/"
if(!dir.exists(path)) dir.create(path, recursive = TRUE)
save(demographics, file = "../data/imported/demographics.RData")