Visdat Overview of the Data

shhs_data %>% visdat::vis_dat()

Skimr Overview of the Data

skimr::skim(shhs_data)
## Skim summary statistics
##  n obs: 5802 
##  n variables: 17 
## 
## ── Variable type:factor ────────────────────────────────────────────────────────────────────────────────
##     variable missing complete    n n_unique
##         ace1      18     5784 5802        2
##      any_cvd     760     5042 5802        2
##        beta1      18     5784 5802        2
##       gender       0     5802 5802        2
##   htnderv_s1       0     5802 5802        2
##         race       0     5802 5802        3
##  smokstat_s1      41     5761 5802        3
##       srhype     249     5553 5802        2
##                              top_counts ordered
##              No: 5066, Yes: 718, NA: 18   FALSE
##            No: 3846, Yes: 1196, NA: 760   FALSE
##              No: 5056, Yes: 728, NA: 18   FALSE
##             Fem: 3037, Mal: 2765, NA: 0   FALSE
##              No: 3325, Yes: 2477, NA: 0   FALSE
##    Whi: 4907, Bla: 515, Oth: 380, NA: 0   FALSE
##  Nev: 2706, For: 2495, Cur: 560, NA: 41   FALSE
##            No: 3484, Yes: 2069, NA: 249   FALSE
## 
## ── Variable type:integer ───────────────────────────────────────────────────────────────────────────────
##  variable missing complete    n   mean    sd p0 p25 p50 p75 p100     hist
##    age_s1       0     5802 5802  63.14 11.22 39  55  63  72   90 ▃▃▇▇▇▇▃▁
##      chol     275     5527 5802 207.56 38.67 28 182 205 230  458 ▁▁▅▇▂▁▁▁
##    systbp     111     5691 5802 127.38 19.33 52 113 125 139  214 ▁▁▅▇▅▂▁▁
## 
## ── Variable type:numeric ───────────────────────────────────────────────────────────────────────────────
##  variable missing complete    n  mean    sd   p0   p25   p50   p75   p100
##  ahi_a0h3       0     5802 5802 14.65 15.61  0    3.97  9.5  19.6  161.84
##  ahi_a0h4       0     5802 5802 10.18 13.61  0    1.75  5.2  13.14 154.29
##    bmi_s1      41     5761 5802 28.16  5.09 18   24.67 27.5  30.76  50   
##       hdl     500     5302 5802 50.64 15.72  5.5 39    48    59    149   
##    neck20      29     5773 5802 37.82  4.21 22   34.5  37.9  40.9   59   
##      oahi     698     5104 5802  9.63 12.74  0    1.67  5.03 12.61 115.81
##      hist
##  ▇▂▁▁▁▁▁▁
##  ▇▁▁▁▁▁▁▁
##  ▂▇▇▅▂▁▁▁
##  ▁▆▇▃▁▁▁▁
##  ▁▁▆▇▅▁▁▁
##  ▇▂▁▁▁▁▁▁

Categorical: Barplot of a single variable

shhs_data %>% ggplot(aes(x=any_cvd, fill=any_cvd)) + geom_bar()

Categorical: Proportional Plot of race vs any_cvd

shhs_data %>% ggplot(aes(x=race, fill=any_cvd)) + geom_bar(position = "fill")

Categorical: Missing Value Plot of any_cvd versus all other variables

shhs_data %>% naniar::gg_miss_fct(fct = any_cvd)

Continuous: Histogram of age_s1

shhs_data %>% ggplot(aes(x=age_s1)) + geom_histogram(bins = 40)

Continuous: Boxplot of age_s1 versus race

shhs_data %>% ggplot(aes(x=race, y= age_s1, fill=race)) + geom_boxplot()

Continous: Scatterplot of bmi_s1 versus neck20

shhs_data %>% ggplot(aes(x=bmi_s1, y=neck20)) + geom_point() + stat_smooth(method="lm", se=FALSE)
## Warning: Removed 68 rows containing non-finite values (stat_smooth).
## Warning: Removed 68 rows containing missing values (geom_point).

Continous: Scatterplot of bmi_s1 versus neck20 with missing data

shhs_data %>% ggplot(aes(x=bmi_s1, y=neck20)) + naniar::geom_miss_point() + stat_smooth(method="lm", se=FALSE)
## Warning: Removed 68 rows containing non-finite values (stat_smooth).