Regression

Author
Affiliations
John R Little

Duke University

Published

January 6, 2023

Load library packages

library(dplyr)
library(ggplot2)
#library(gapminder)
library(moderndive)
library(broom)

Data

data are from the moderndive package. Modern dive by Ismay and Kim.

evals_ch5 <- evals %>% 
  select(ID, score, bty_avg, age)

evals
evals_ch5
evals_ch5 %>% 
  summary()
       ID            score          bty_avg           age       
 Min.   :  1.0   Min.   :2.300   Min.   :1.667   Min.   :29.00  
 1st Qu.:116.5   1st Qu.:3.800   1st Qu.:3.167   1st Qu.:42.00  
 Median :232.0   Median :4.300   Median :4.333   Median :48.00  
 Mean   :232.0   Mean   :4.175   Mean   :4.418   Mean   :48.37  
 3rd Qu.:347.5   3rd Qu.:4.600   3rd Qu.:5.500   3rd Qu.:57.00  
 Max.   :463.0   Max.   :5.000   Max.   :8.167   Max.   :73.00  
skimr::skim(evals_ch5)
Data summary
Name evals_ch5
Number of rows 463
Number of columns 4
_______________________
Column type frequency:
numeric 4
________________________
Group variables None

Variable type: numeric

skim_variable n_missing complete_rate mean sd p0 p25 p50 p75 p100 hist
ID 0 1 232.00 133.80 1.00 116.50 232.00 347.5 463.00 ▇▇▇▇▇
score 0 1 4.17 0.54 2.30 3.80 4.30 4.6 5.00 ▁▁▅▇▇
bty_avg 0 1 4.42 1.53 1.67 3.17 4.33 5.5 8.17 ▃▇▇▃▂
age 0 1 48.37 9.80 29.00 42.00 48.00 57.0 73.00 ▅▆▇▆▁
evals_ch5 %>% 
  get_correlation(score ~ bty_avg)

Get correlation

starwars %>% 
  filter(mass < 500) %>% 
  summarise(cor(mass, height))
evals_ch5 %>% 
  summary()
       ID            score          bty_avg           age       
 Min.   :  1.0   Min.   :2.300   Min.   :1.667   Min.   :29.00  
 1st Qu.:116.5   1st Qu.:3.800   1st Qu.:3.167   1st Qu.:42.00  
 Median :232.0   Median :4.300   Median :4.333   Median :48.00  
 Mean   :232.0   Mean   :4.175   Mean   :4.418   Mean   :48.37  
 3rd Qu.:347.5   3rd Qu.:4.600   3rd Qu.:5.500   3rd Qu.:57.00  
 Max.   :463.0   Max.   :5.000   Max.   :8.167   Max.   :73.00  

weak correlation

evals_ch5 %>% 
  ggplot(aes(score, age)) +
  geom_jitter() +
  geom_smooth(method = lm)
`geom_smooth()` using formula = 'y ~ x'

evals_ch5 %>% 
  get_correlation(age ~ score)

summary model

For every increase of 1 unit increase in bty_avg, there is an associated increase of, on average, 0.067 units of score. from ModenDive

# Fit regression model:
score_model <- lm(score ~ bty_avg, data = evals_ch5)


glance(score_model)
get_regression_table(score_model)

more

get_regression_table(score_model)
broom::tidy(score_model)
get_regression_summaries(score_model)
broom::glance(score_model)

More model data

get_regression_points(score_model)
broom::augment(score_model)