Visdat Overview of the Data
shhs_data %>% visdat::vis_dat()

Skimr Overview of the Data
skimr::skim(shhs_data)
## Skim summary statistics
## n obs: 5802
## n variables: 17
##
## ── Variable type:factor ────────────────────────────────────────────────────────────────────────────────
## variable missing complete n n_unique
## ace1 18 5784 5802 2
## any_cvd 760 5042 5802 2
## beta1 18 5784 5802 2
## gender 0 5802 5802 2
## htnderv_s1 0 5802 5802 2
## race 0 5802 5802 3
## smokstat_s1 41 5761 5802 3
## srhype 249 5553 5802 2
## top_counts ordered
## No: 5066, Yes: 718, NA: 18 FALSE
## No: 3846, Yes: 1196, NA: 760 FALSE
## No: 5056, Yes: 728, NA: 18 FALSE
## Fem: 3037, Mal: 2765, NA: 0 FALSE
## No: 3325, Yes: 2477, NA: 0 FALSE
## Whi: 4907, Bla: 515, Oth: 380, NA: 0 FALSE
## Nev: 2706, For: 2495, Cur: 560, NA: 41 FALSE
## No: 3484, Yes: 2069, NA: 249 FALSE
##
## ── Variable type:integer ───────────────────────────────────────────────────────────────────────────────
## variable missing complete n mean sd p0 p25 p50 p75 p100 hist
## age_s1 0 5802 5802 63.14 11.22 39 55 63 72 90 ▃▃▇▇▇▇▃▁
## chol 275 5527 5802 207.56 38.67 28 182 205 230 458 ▁▁▅▇▂▁▁▁
## systbp 111 5691 5802 127.38 19.33 52 113 125 139 214 ▁▁▅▇▅▂▁▁
##
## ── Variable type:numeric ───────────────────────────────────────────────────────────────────────────────
## variable missing complete n mean sd p0 p25 p50 p75 p100
## ahi_a0h3 0 5802 5802 14.65 15.61 0 3.97 9.5 19.6 161.84
## ahi_a0h4 0 5802 5802 10.18 13.61 0 1.75 5.2 13.14 154.29
## bmi_s1 41 5761 5802 28.16 5.09 18 24.67 27.5 30.76 50
## hdl 500 5302 5802 50.64 15.72 5.5 39 48 59 149
## neck20 29 5773 5802 37.82 4.21 22 34.5 37.9 40.9 59
## oahi 698 5104 5802 9.63 12.74 0 1.67 5.03 12.61 115.81
## hist
## ▇▂▁▁▁▁▁▁
## ▇▁▁▁▁▁▁▁
## ▂▇▇▅▂▁▁▁
## ▁▆▇▃▁▁▁▁
## ▁▁▆▇▅▁▁▁
## ▇▂▁▁▁▁▁▁
Categorical: Barplot of a single variable
shhs_data %>% ggplot(aes(x=any_cvd, fill=any_cvd)) + geom_bar()

Categorical: Proportional Plot of race
vs any_cvd
shhs_data %>% ggplot(aes(x=race, fill=any_cvd)) + geom_bar(position = "fill")

Categorical: Missing Value Plot of any_cvd
versus all other variables
shhs_data %>% naniar::gg_miss_fct(fct = any_cvd)

Continuous: Histogram of age_s1
shhs_data %>% ggplot(aes(x=age_s1)) + geom_histogram(bins = 40)

Continuous: Boxplot of age_s1
versus race
shhs_data %>% ggplot(aes(x=race, y= age_s1, fill=race)) + geom_boxplot()

Continous: Scatterplot of bmi_s1
versus neck20
shhs_data %>% ggplot(aes(x=bmi_s1, y=neck20)) + geom_point() + stat_smooth(method="lm", se=FALSE)
## Warning: Removed 68 rows containing non-finite values (stat_smooth).
## Warning: Removed 68 rows containing missing values (geom_point).

Continous: Scatterplot of bmi_s1
versus neck20
with missing data
shhs_data %>% ggplot(aes(x=bmi_s1, y=neck20)) + naniar::geom_miss_point() + stat_smooth(method="lm", se=FALSE)
## Warning: Removed 68 rows containing non-finite values (stat_smooth).
