{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Regression\n",
"Code used for regression analysis.\n",
"The input for this script is `df_sum.csv` as explained in `data_wrangling.md`."
]
},
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
"# DEPENDENCIES\n",
"import itertools\n",
"import matplotlib.pyplot as plt\n",
"import numpy as np\n",
"import pandas as pd\n",
"import seaborn as sns\n",
"import statsmodels.api as sm\n",
"from scipy.stats.stats import pearsonr\n",
"from sklearn import preprocessing\n",
"\n",
"sns.set_style('whitegrid')\n",
"plt.rcParams.update({'font.size': 15})"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"
\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" popsize | \n",
" hostility | \n",
" memsize | \n",
" numskills | \n",
" dist_cor | \n",
" p | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" 100 | \n",
" 0.2 | \n",
" 1 | \n",
" 1 | \n",
" 0.007378 | \n",
" 4.465384e-03 | \n",
"
\n",
" \n",
" 1 | \n",
" 100 | \n",
" 0.2 | \n",
" 1 | \n",
" 21 | \n",
" 0.029851 | \n",
" 1.219621e-30 | \n",
"
\n",
" \n",
" 2 | \n",
" 100 | \n",
" 0.2 | \n",
" 1 | \n",
" 41 | \n",
" 0.026136 | \n",
" 7.196704e-24 | \n",
"
\n",
" \n",
" 3 | \n",
" 100 | \n",
" 0.2 | \n",
" 1 | \n",
" 61 | \n",
" 0.085331 | \n",
" 2.765941e-238 | \n",
"
\n",
" \n",
" 4 | \n",
" 100 | \n",
" 0.2 | \n",
" 1 | \n",
" 81 | \n",
" 0.017161 | \n",
" 3.748902e-11 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" popsize hostility memsize numskills dist_cor p\n",
"0 100 0.2 1 1 0.007378 4.465384e-03\n",
"1 100 0.2 1 21 0.029851 1.219621e-30\n",
"2 100 0.2 1 41 0.026136 7.196704e-24\n",
"3 100 0.2 1 61 0.085331 2.765941e-238\n",
"4 100 0.2 1 81 0.017161 3.748902e-11"
]
},
"execution_count": 2,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# IMPORT DATA\n",
"df = pd.read_csv(\"netlogo_output/df_sum.csv\")\n",
"\n",
"# CLEAN DATA\n",
"df = df.dropna() # can't have NaN's\n",
"df = df[df.p < 0.05] # can't have insignificant correlations\n",
"\n",
"df.head()"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"OLS Regression Results\n",
"\n",
" Dep. Variable: | dist_cor | R-squared: | 0.016 | \n",
"
\n",
"\n",
" Model: | OLS | Adj. R-squared: | 0.014 | \n",
"
\n",
"\n",
" Method: | Least Squares | F-statistic: | 11.61 | \n",
"
\n",
"\n",
" Date: | Sat, 07 Nov 2020 | Prob (F-statistic): | 0.000692 | \n",
"
\n",
"\n",
" Time: | 13:56:02 | Log-Likelihood: | 143.50 | \n",
"
\n",
"\n",
" No. Observations: | 736 | AIC: | -283.0 | \n",
"
\n",
"\n",
" Df Residuals: | 734 | BIC: | -273.8 | \n",
"
\n",
"\n",
" Df Model: | 1 | | | \n",
"
\n",
"\n",
" Covariance Type: | nonrobust | | | \n",
"
\n",
"
\n",
"\n",
"\n",
" | coef | std err | t | P>|t| | [0.025 | 0.975] | \n",
"
\n",
"\n",
" const | 0.3635 | 0.015 | 24.195 | 0.000 | 0.334 | 0.393 | \n",
"
\n",
"\n",
" popsize | -8.872e-05 | 2.6e-05 | -3.407 | 0.001 | -0.000 | -3.76e-05 | \n",
"
\n",
"
\n",
"\n",
"\n",
" Omnibus: | 341.314 | Durbin-Watson: | 1.671 | \n",
"
\n",
"\n",
" Prob(Omnibus): | 0.000 | Jarque-Bera (JB): | 45.918 | \n",
"
\n",
"\n",
" Skew: | -0.223 | Prob(JB): | 1.07e-10 | \n",
"
\n",
"\n",
" Kurtosis: | 1.861 | Cond. No. | 1.18e+03 | \n",
"
\n",
"
Warnings:
[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.
[2] The condition number is large, 1.18e+03. This might indicate that there are
strong multicollinearity or other numerical problems."
],
"text/plain": [
"\n",
"\"\"\"\n",
" OLS Regression Results \n",
"==============================================================================\n",
"Dep. Variable: dist_cor R-squared: 0.016\n",
"Model: OLS Adj. R-squared: 0.014\n",
"Method: Least Squares F-statistic: 11.61\n",
"Date: Sat, 07 Nov 2020 Prob (F-statistic): 0.000692\n",
"Time: 13:56:02 Log-Likelihood: 143.50\n",
"No. Observations: 736 AIC: -283.0\n",
"Df Residuals: 734 BIC: -273.8\n",
"Df Model: 1 \n",
"Covariance Type: nonrobust \n",
"==============================================================================\n",
" coef std err t P>|t| [0.025 0.975]\n",
"------------------------------------------------------------------------------\n",
"const 0.3635 0.015 24.195 0.000 0.334 0.393\n",
"popsize -8.872e-05 2.6e-05 -3.407 0.001 -0.000 -3.76e-05\n",
"==============================================================================\n",
"Omnibus: 341.314 Durbin-Watson: 1.671\n",
"Prob(Omnibus): 0.000 Jarque-Bera (JB): 45.918\n",
"Skew: -0.223 Prob(JB): 1.07e-10\n",
"Kurtosis: 1.861 Cond. No. 1.18e+03\n",
"==============================================================================\n",
"\n",
"Warnings:\n",
"[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.\n",
"[2] The condition number is large, 1.18e+03. This might indicate that there are\n",
"strong multicollinearity or other numerical problems.\n",
"\"\"\""
]
},
"execution_count": 3,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# get IV and DV\n",
"X = df[\"popsize\"]\n",
"y = df[\"dist_cor\"]\n",
"\n",
"# R^2 drops if I add a constant\n",
"X = sm.add_constant(X)\n",
"\n",
"# fit the model\n",
"model = sm.OLS(y, X).fit()\n",
"# predictions = model.predict(X)\n",
"\n",
"# show summary\n",
"model.summary()"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"OLS Regression Results\n",
"\n",
" Dep. Variable: | dist_cor | R-squared: | 0.017 | \n",
"
\n",
"\n",
" Model: | OLS | Adj. R-squared: | 0.016 | \n",
"
\n",
"\n",
" Method: | Least Squares | F-statistic: | 12.66 | \n",
"
\n",
"\n",
" Date: | Sat, 07 Nov 2020 | Prob (F-statistic): | 0.000397 | \n",
"
\n",
"\n",
" Time: | 13:56:07 | Log-Likelihood: | 144.02 | \n",
"
\n",
"\n",
" No. Observations: | 736 | AIC: | -284.0 | \n",
"
\n",
"\n",
" Df Residuals: | 734 | BIC: | -274.8 | \n",
"
\n",
"\n",
" Df Model: | 1 | | | \n",
"
\n",
"\n",
" Covariance Type: | nonrobust | | | \n",
"
\n",
"
\n",
"\n",
"\n",
" | coef | std err | t | P>|t| | [0.025 | 0.975] | \n",
"
\n",
"\n",
" const | 0.2807 | 0.013 | 21.587 | 0.000 | 0.255 | 0.306 | \n",
"
\n",
"\n",
" hostility | 0.0767 | 0.022 | 3.558 | 0.000 | 0.034 | 0.119 | \n",
"
\n",
"
\n",
"\n",
"\n",
" Omnibus: | 607.422 | Durbin-Watson: | 1.677 | \n",
"
\n",
"\n",
" Prob(Omnibus): | 0.000 | Jarque-Bera (JB): | 55.697 | \n",
"
\n",
"\n",
" Skew: | -0.287 | Prob(JB): | 8.04e-13 | \n",
"
\n",
"\n",
" Kurtosis: | 1.780 | Cond. No. | 3.74 | \n",
"
\n",
"
Warnings:
[1] Standard Errors assume that the covariance matrix of the errors is correctly specified."
],
"text/plain": [
"\n",
"\"\"\"\n",
" OLS Regression Results \n",
"==============================================================================\n",
"Dep. Variable: dist_cor R-squared: 0.017\n",
"Model: OLS Adj. R-squared: 0.016\n",
"Method: Least Squares F-statistic: 12.66\n",
"Date: Sat, 07 Nov 2020 Prob (F-statistic): 0.000397\n",
"Time: 13:56:07 Log-Likelihood: 144.02\n",
"No. Observations: 736 AIC: -284.0\n",
"Df Residuals: 734 BIC: -274.8\n",
"Df Model: 1 \n",
"Covariance Type: nonrobust \n",
"==============================================================================\n",
" coef std err t P>|t| [0.025 0.975]\n",
"------------------------------------------------------------------------------\n",
"const 0.2807 0.013 21.587 0.000 0.255 0.306\n",
"hostility 0.0767 0.022 3.558 0.000 0.034 0.119\n",
"==============================================================================\n",
"Omnibus: 607.422 Durbin-Watson: 1.677\n",
"Prob(Omnibus): 0.000 Jarque-Bera (JB): 55.697\n",
"Skew: -0.287 Prob(JB): 8.04e-13\n",
"Kurtosis: 1.780 Cond. No. 3.74\n",
"==============================================================================\n",
"\n",
"Warnings:\n",
"[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.\n",
"\"\"\""
]
},
"execution_count": 4,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"X = df[\"hostility\"]\n",
"y = df[\"dist_cor\"]\n",
"X = sm.add_constant(X)\n",
"model = sm.OLS(y, X).fit()\n",
"predictions = model.predict(X)\n",
"model.summary()"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"OLS Regression Results\n",
"\n",
" Dep. Variable: | dist_cor | R-squared: | 0.101 | \n",
"
\n",
"\n",
" Model: | OLS | Adj. R-squared: | 0.100 | \n",
"
\n",
"\n",
" Method: | Least Squares | F-statistic: | 82.42 | \n",
"
\n",
"\n",
" Date: | Sat, 07 Nov 2020 | Prob (F-statistic): | 1.00e-18 | \n",
"
\n",
"\n",
" Time: | 13:56:10 | Log-Likelihood: | 176.88 | \n",
"
\n",
"\n",
" No. Observations: | 736 | AIC: | -349.8 | \n",
"
\n",
"\n",
" Df Residuals: | 734 | BIC: | -340.6 | \n",
"
\n",
"\n",
" Df Model: | 1 | | | \n",
"
\n",
"\n",
" Covariance Type: | nonrobust | | | \n",
"
\n",
"
\n",
"\n",
"\n",
" | coef | std err | t | P>|t| | [0.025 | 0.975] | \n",
"
\n",
"\n",
" const | 0.2242 | 0.013 | 17.834 | 0.000 | 0.200 | 0.249 | \n",
"
\n",
"\n",
" memsize | 0.0023 | 0.000 | 9.079 | 0.000 | 0.002 | 0.003 | \n",
"
\n",
"
\n",
"\n",
"\n",
" Omnibus: | 66.158 | Durbin-Watson: | 1.863 | \n",
"
\n",
"\n",
" Prob(Omnibus): | 0.000 | Jarque-Bera (JB): | 35.368 | \n",
"
\n",
"\n",
" Skew: | -0.378 | Prob(JB): | 2.09e-08 | \n",
"
\n",
"\n",
" Kurtosis: | 2.237 | Cond. No. | 90.0 | \n",
"
\n",
"
Warnings:
[1] Standard Errors assume that the covariance matrix of the errors is correctly specified."
],
"text/plain": [
"\n",
"\"\"\"\n",
" OLS Regression Results \n",
"==============================================================================\n",
"Dep. Variable: dist_cor R-squared: 0.101\n",
"Model: OLS Adj. R-squared: 0.100\n",
"Method: Least Squares F-statistic: 82.42\n",
"Date: Sat, 07 Nov 2020 Prob (F-statistic): 1.00e-18\n",
"Time: 13:56:10 Log-Likelihood: 176.88\n",
"No. Observations: 736 AIC: -349.8\n",
"Df Residuals: 734 BIC: -340.6\n",
"Df Model: 1 \n",
"Covariance Type: nonrobust \n",
"==============================================================================\n",
" coef std err t P>|t| [0.025 0.975]\n",
"------------------------------------------------------------------------------\n",
"const 0.2242 0.013 17.834 0.000 0.200 0.249\n",
"memsize 0.0023 0.000 9.079 0.000 0.002 0.003\n",
"==============================================================================\n",
"Omnibus: 66.158 Durbin-Watson: 1.863\n",
"Prob(Omnibus): 0.000 Jarque-Bera (JB): 35.368\n",
"Skew: -0.378 Prob(JB): 2.09e-08\n",
"Kurtosis: 2.237 Cond. No. 90.0\n",
"==============================================================================\n",
"\n",
"Warnings:\n",
"[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.\n",
"\"\"\""
]
},
"execution_count": 5,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"X = df[\"memsize\"]\n",
"y = df[\"dist_cor\"]\n",
"X = sm.add_constant(X)\n",
"model = sm.OLS(y, X).fit()\n",
"predictions = model.predict(X)\n",
"model.summary()"
]
},
{
"cell_type": "code",
"execution_count": 11,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"OLS Regression Results\n",
"\n",
" Dep. Variable: | dist_cor | R-squared: | 0.361 | \n",
"
\n",
"\n",
" Model: | OLS | Adj. R-squared: | 0.360 | \n",
"
\n",
"\n",
" Method: | Least Squares | F-statistic: | 415.3 | \n",
"
\n",
"\n",
" Date: | Sun, 20 Sep 2020 | Prob (F-statistic): | 1.68e-73 | \n",
"
\n",
"\n",
" Time: | 13:38:11 | Log-Likelihood: | 302.72 | \n",
"
\n",
"\n",
" No. Observations: | 736 | AIC: | -601.4 | \n",
"
\n",
"\n",
" Df Residuals: | 734 | BIC: | -592.2 | \n",
"
\n",
"\n",
" Df Model: | 1 | | | \n",
"
\n",
"\n",
" Covariance Type: | nonrobust | | | \n",
"
\n",
"
\n",
"\n",
"\n",
" | coef | std err | t | P>|t| | [0.025 | 0.975] | \n",
"
\n",
"\n",
" const | 0.1413 | 0.011 | 13.419 | 0.000 | 0.121 | 0.162 | \n",
"
\n",
"\n",
" numskills | 0.0043 | 0.000 | 20.378 | 0.000 | 0.004 | 0.005 | \n",
"
\n",
"
\n",
"\n",
"\n",
" Omnibus: | 26.185 | Durbin-Watson: | 1.147 | \n",
"
\n",
"\n",
" Prob(Omnibus): | 0.000 | Jarque-Bera (JB): | 14.224 | \n",
"
\n",
"\n",
" Skew: | 0.155 | Prob(JB): | 0.000815 | \n",
"
\n",
"\n",
" Kurtosis: | 2.394 | Cond. No. | 89.0 | \n",
"
\n",
"
Warnings:
[1] Standard Errors assume that the covariance matrix of the errors is correctly specified."
],
"text/plain": [
"\n",
"\"\"\"\n",
" OLS Regression Results \n",
"==============================================================================\n",
"Dep. Variable: dist_cor R-squared: 0.361\n",
"Model: OLS Adj. R-squared: 0.360\n",
"Method: Least Squares F-statistic: 415.3\n",
"Date: Sun, 20 Sep 2020 Prob (F-statistic): 1.68e-73\n",
"Time: 13:38:11 Log-Likelihood: 302.72\n",
"No. Observations: 736 AIC: -601.4\n",
"Df Residuals: 734 BIC: -592.2\n",
"Df Model: 1 \n",
"Covariance Type: nonrobust \n",
"==============================================================================\n",
" coef std err t P>|t| [0.025 0.975]\n",
"------------------------------------------------------------------------------\n",
"const 0.1413 0.011 13.419 0.000 0.121 0.162\n",
"numskills 0.0043 0.000 20.378 0.000 0.004 0.005\n",
"==============================================================================\n",
"Omnibus: 26.185 Durbin-Watson: 1.147\n",
"Prob(Omnibus): 0.000 Jarque-Bera (JB): 14.224\n",
"Skew: 0.155 Prob(JB): 0.000815\n",
"Kurtosis: 2.394 Cond. No. 89.0\n",
"==============================================================================\n",
"\n",
"Warnings:\n",
"[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.\n",
"\"\"\""
]
},
"execution_count": 11,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"X = df[\"numskills\"]\n",
"y = df[\"dist_cor\"]\n",
"X = sm.add_constant(X)\n",
"model = sm.OLS(y, X).fit()\n",
"predictions = model.predict(X)\n",
"model.summary()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Multiple Linear Regression"
]
},
{
"cell_type": "code",
"execution_count": 16,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"OLS Regression Results\n",
"\n",
" Dep. Variable: | dist_cor | R-squared: | 0.468 | \n",
"
\n",
"\n",
" Model: | OLS | Adj. R-squared: | 0.466 | \n",
"
\n",
"\n",
" Method: | Least Squares | F-statistic: | 322.1 | \n",
"
\n",
"\n",
" Date: | Sun, 20 Sep 2020 | Prob (F-statistic): | 4.16e-101 | \n",
"
\n",
"\n",
" Time: | 13:42:13 | Log-Likelihood: | 369.80 | \n",
"
\n",
"\n",
" No. Observations: | 736 | AIC: | -733.6 | \n",
"
\n",
"\n",
" Df Residuals: | 733 | BIC: | -719.8 | \n",
"
\n",
"\n",
" Df Model: | 2 | | | \n",
"
\n",
"\n",
" Covariance Type: | nonrobust | | | \n",
"
\n",
"
\n",
"\n",
"\n",
" | coef | std err | t | P>|t| | [0.025 | 0.975] | \n",
"
\n",
"\n",
" const | 0.0428 | 0.013 | 3.394 | 0.001 | 0.018 | 0.068 | \n",
"
\n",
"\n",
" numskills | 0.0043 | 0.000 | 22.476 | 0.000 | 0.004 | 0.005 | \n",
"
\n",
"\n",
" memsize | 0.0023 | 0.000 | 12.107 | 0.000 | 0.002 | 0.003 | \n",
"
\n",
"
\n",
"\n",
"\n",
" Omnibus: | 29.689 | Durbin-Watson: | 1.410 | \n",
"
\n",
"\n",
" Prob(Omnibus): | 0.000 | Jarque-Bera (JB): | 16.512 | \n",
"
\n",
"\n",
" Skew: | 0.195 | Prob(JB): | 0.000260 | \n",
"
\n",
"\n",
" Kurtosis: | 2.379 | Cond. No. | 152. | \n",
"
\n",
"
Warnings:
[1] Standard Errors assume that the covariance matrix of the errors is correctly specified."
],
"text/plain": [
"\n",
"\"\"\"\n",
" OLS Regression Results \n",
"==============================================================================\n",
"Dep. Variable: dist_cor R-squared: 0.468\n",
"Model: OLS Adj. R-squared: 0.466\n",
"Method: Least Squares F-statistic: 322.1\n",
"Date: Sun, 20 Sep 2020 Prob (F-statistic): 4.16e-101\n",
"Time: 13:42:13 Log-Likelihood: 369.80\n",
"No. Observations: 736 AIC: -733.6\n",
"Df Residuals: 733 BIC: -719.8\n",
"Df Model: 2 \n",
"Covariance Type: nonrobust \n",
"==============================================================================\n",
" coef std err t P>|t| [0.025 0.975]\n",
"------------------------------------------------------------------------------\n",
"const 0.0428 0.013 3.394 0.001 0.018 0.068\n",
"numskills 0.0043 0.000 22.476 0.000 0.004 0.005\n",
"memsize 0.0023 0.000 12.107 0.000 0.002 0.003\n",
"==============================================================================\n",
"Omnibus: 29.689 Durbin-Watson: 1.410\n",
"Prob(Omnibus): 0.000 Jarque-Bera (JB): 16.512\n",
"Skew: 0.195 Prob(JB): 0.000260\n",
"Kurtosis: 2.379 Cond. No. 152.\n",
"==============================================================================\n",
"\n",
"Warnings:\n",
"[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.\n",
"\"\"\""
]
},
"execution_count": 16,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"X = df[[\"numskills\", \"memsize\"]]\n",
"y = df[\"dist_cor\"]\n",
"X = sm.add_constant(X)\n",
"\n",
"model = sm.OLS(y, X).fit()\n",
"model.summary()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Full model"
]
},
{
"cell_type": "code",
"execution_count": 24,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"OLS Regression Results\n",
"\n",
" Dep. Variable: | dist_cor | R-squared: | 0.497 | \n",
"
\n",
"\n",
" Model: | OLS | Adj. R-squared: | 0.494 | \n",
"
\n",
"\n",
" Method: | Least Squares | F-statistic: | 180.6 | \n",
"
\n",
"\n",
" Date: | Sun, 20 Sep 2020 | Prob (F-statistic): | 1.46e-107 | \n",
"
\n",
"\n",
" Time: | 14:17:09 | Log-Likelihood: | 390.65 | \n",
"
\n",
"\n",
" No. Observations: | 736 | AIC: | -771.3 | \n",
"
\n",
"\n",
" Df Residuals: | 731 | BIC: | -748.3 | \n",
"
\n",
"\n",
" Df Model: | 4 | | | \n",
"
\n",
"\n",
" Covariance Type: | nonrobust | | | \n",
"
\n",
"
\n",
"\n",
"\n",
" | coef | std err | t | P>|t| | [0.025 | 0.975] | \n",
"
\n",
"\n",
" const | 0.0509 | 0.017 | 2.943 | 0.003 | 0.017 | 0.085 | \n",
"
\n",
"\n",
" popsize | -8.504e-05 | 1.87e-05 | -4.559 | 0.000 | -0.000 | -4.84e-05 | \n",
"
\n",
"\n",
" hostility | 0.0722 | 0.015 | 4.668 | 0.000 | 0.042 | 0.103 | \n",
"
\n",
"\n",
" memsize | 0.0023 | 0.000 | 12.323 | 0.000 | 0.002 | 0.003 | \n",
"
\n",
"\n",
" numskills | 0.0043 | 0.000 | 23.049 | 0.000 | 0.004 | 0.005 | \n",
"
\n",
"
\n",
"\n",
"\n",
" Omnibus: | 28.997 | Durbin-Watson: | 1.489 | \n",
"
\n",
"\n",
" Prob(Omnibus): | 0.000 | Jarque-Bera (JB): | 13.951 | \n",
"
\n",
"\n",
" Skew: | 0.104 | Prob(JB): | 0.000935 | \n",
"
\n",
"\n",
" Kurtosis: | 2.358 | Cond. No. | 2.17e+03 | \n",
"
\n",
"
Warnings:
[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.
[2] The condition number is large, 2.17e+03. This might indicate that there are
strong multicollinearity or other numerical problems."
],
"text/plain": [
"\n",
"\"\"\"\n",
" OLS Regression Results \n",
"==============================================================================\n",
"Dep. Variable: dist_cor R-squared: 0.497\n",
"Model: OLS Adj. R-squared: 0.494\n",
"Method: Least Squares F-statistic: 180.6\n",
"Date: Sun, 20 Sep 2020 Prob (F-statistic): 1.46e-107\n",
"Time: 14:17:09 Log-Likelihood: 390.65\n",
"No. Observations: 736 AIC: -771.3\n",
"Df Residuals: 731 BIC: -748.3\n",
"Df Model: 4 \n",
"Covariance Type: nonrobust \n",
"==============================================================================\n",
" coef std err t P>|t| [0.025 0.975]\n",
"------------------------------------------------------------------------------\n",
"const 0.0509 0.017 2.943 0.003 0.017 0.085\n",
"popsize -8.504e-05 1.87e-05 -4.559 0.000 -0.000 -4.84e-05\n",
"hostility 0.0722 0.015 4.668 0.000 0.042 0.103\n",
"memsize 0.0023 0.000 12.323 0.000 0.002 0.003\n",
"numskills 0.0043 0.000 23.049 0.000 0.004 0.005\n",
"==============================================================================\n",
"Omnibus: 28.997 Durbin-Watson: 1.489\n",
"Prob(Omnibus): 0.000 Jarque-Bera (JB): 13.951\n",
"Skew: 0.104 Prob(JB): 0.000935\n",
"Kurtosis: 2.358 Cond. No. 2.17e+03\n",
"==============================================================================\n",
"\n",
"Warnings:\n",
"[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.\n",
"[2] The condition number is large, 2.17e+03. This might indicate that there are\n",
"strong multicollinearity or other numerical problems.\n",
"\"\"\""
]
},
"execution_count": 24,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"X = df[IVs]\n",
"y = df[\"dist_cor\"]\n",
"X = sm.add_constant(X)\n",
"\n",
"model = sm.OLS(y, X).fit()\n",
"model.summary()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"@webio": {
"lastCommId": null,
"lastKernelId": null
},
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.8.3"
}
},
"nbformat": 4,
"nbformat_minor": 4
}