import pandas as pd
import numpy as np
import statsmodels.api as sm
import matplotlib.pyplot as plt
import numpy as np
import matplotlib.pyplot as plt
import netCDF4 as nc
import pandas as pd
import os
import csv
from glob import glob
import xarray as xr
import matplotlib.ticker as ticker
import numpy as np
import pandas as pd
import statsmodels.api as sm
import matplotlib.pyplot as plt
from matplotlib.gridspec import GridSpec
import re
from datetime import datetime
from linearmodels.panel import PanelOLS
/Users/chenchenren/anaconda3/lib/python3.11/site-packages/pandas/core/arrays/masked.py:60: UserWarning: Pandas requires version '1.3.6' or newer of 'bottleneck' (version '1.3.5' currently installed). from pandas.core import (
# Step 1: Data Preparation
# Load your dataset into a pandas DataFrame
os.chdir('/Users/chenchenren/postdoc/paper/2N and water-US/regression/')
excel_file = "./soybean_reg_data.csv"
# Read the specific sheet from the Excel file
ds = pd.read_csv(excel_file)
df_soybean = ds
print(df_soybean)
geoid year tmp pre irrigation1 lnyield lnfer \ 0 1001.0 2009.0 24.140660 8.808839 0.718851 7.609614 2.022537 1 1001.0 2010.0 25.675484 5.438990 0.830087 7.035731 1.910005 2 1001.0 2011.0 24.544823 5.270162 0.595086 7.466514 2.025641 3 1001.0 2013.0 24.036592 7.514143 0.001241 7.926855 2.906756 4 1003.0 2008.0 24.727230 8.675528 0.001980 7.934111 1.240602 ... ... ... ... ... ... ... ... 17577 55141.0 2016.0 17.486904 7.304833 0.000000 8.134343 2.860158 17578 55141.0 2017.0 16.590237 6.467205 0.000000 7.978877 2.633171 17579 55141.0 2018.0 17.266659 8.373576 0.000000 7.838077 2.639034 17580 55141.0 2019.0 16.238592 8.735889 0.000000 7.912185 2.671287 17581 55141.0 2020.0 16.202635 6.832954 0.000000 8.173033 2.625827 group zone 0 1 5.0 1 1 5.0 2 1 5.0 3 1 5.0 4 1 5.0 ... ... ... 17577 2 1.0 17578 2 1.0 17579 2 1.0 17580 2 1.0 17581 2 1.0 [17582 rows x 9 columns]
#model 1
data = df_soybean[df_soybean['group'] == 1]
# Prepare the interaction terms
data['pre_pre_interaction'] = data['pre'] * data['pre']
data['irrigation12'] = data['irrigation1'] * data['irrigation1']
data['lnfer_irrigation1_pre_interaction'] = data['lnfer'] * data['irrigation1'] * data['pre']
data['lnfer_irrigation12_pre_pre_interaction'] = data['lnfer'] * data['irrigation1'] * data['irrigation1'] * data['pre'] * data['pre']
data['tmp_tmp_interaction'] = data['tmp'] * data['tmp']
data['lnfer_irrigation1_tmp_interaction'] = data['lnfer'] * data['irrigation1'] * data['tmp']
data['lnfer_irrigation12_tmp_tmp_interaction'] = data['lnfer'] * data['irrigation1'] * data['irrigation1'] * data['tmp'] * data['tmp']
# Define the formula for the regression model
formula = 'lnyield ~ lnfer + irrigation1 + irrigation12 + tmp + tmp_tmp_interaction + pre + pre_pre_interaction +year + C(geoid)'
# Run the regression using the formula interface with clustered standard errors
model = sm.OLS.from_formula(formula, data=data).fit(cov_type='cluster', cov_kwds={'groups': data['geoid']})
# Extract the summary table
summary = model.summary2().tables[1]
# Filter out rows corresponding to dummy variables for geoid
exclude_terms = ['C(geoid)']
pattern = '|'.join([re.escape(term) for term in exclude_terms])
filtered_summary1 = summary.loc[~summary.index.str.contains(pattern)]
fixed_effects_only_model = sm.OLS.from_formula('lnyield ~ C(geoid)', data=data).fit()
# Calculate the total sum of squares (TSS) using the model with only fixed effects
tss_within = np.sum((fixed_effects_only_model.resid) ** 2)
# Calculate the residual sum of squares (RSS) from your main model
rss_within = np.sum((model.resid) ** 2)
# Calculate the within R-squared
within_r2 = 1 - (rss_within / tss_within)
# Format the OLS summary with the within R-squared
ols_info = f"""
OLS Regression Results
==============================================================================
Dep. Variable: lnyield R-squared: {model.rsquared:.3f}
Model: OLS Adj. R-squared: {model.rsquared_adj:.3f}
Within R-squared: {within_r2:.3f}
Method: Least Squares F-statistic: {model.fvalue:.1f}
Date: {datetime.now():%a, %d %b %Y} Prob (F-statistic): {model.f_pvalue:.2e}
Time: {datetime.now():%H:%M:%S} Log-Likelihood: {model.llf:.2f}
No. Observations: {model.nobs:.0f} AIC: {model.aic:.1f}
Df Residuals: {model.df_resid:.0f} BIC: {model.bic:.1f}
Df Model: {model.df_model:.0f}
Covariance Type: cluster
==============================================================================
"""
# Combine the OLS info with the filtered summary
final_summary1 = ols_info + "\n" + filtered_summary1.to_string()
# Print the final summary
print(final_summary1)
/var/folders/vd/0_phd7hx2n51y4412862zww00000gp/T/ipykernel_21901/1287681286.py:6: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame. Try using .loc[row_indexer,col_indexer] = value instead See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy data['pre_pre_interaction'] = data['pre'] * data['pre'] /var/folders/vd/0_phd7hx2n51y4412862zww00000gp/T/ipykernel_21901/1287681286.py:7: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame. Try using .loc[row_indexer,col_indexer] = value instead See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy data['irrigation12'] = data['irrigation1'] * data['irrigation1'] /var/folders/vd/0_phd7hx2n51y4412862zww00000gp/T/ipykernel_21901/1287681286.py:8: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame. Try using .loc[row_indexer,col_indexer] = value instead See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy data['lnfer_irrigation1_pre_interaction'] = data['lnfer'] * data['irrigation1'] * data['pre'] /var/folders/vd/0_phd7hx2n51y4412862zww00000gp/T/ipykernel_21901/1287681286.py:9: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame. Try using .loc[row_indexer,col_indexer] = value instead See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy data['lnfer_irrigation12_pre_pre_interaction'] = data['lnfer'] * data['irrigation1'] * data['irrigation1'] * data['pre'] * data['pre'] /var/folders/vd/0_phd7hx2n51y4412862zww00000gp/T/ipykernel_21901/1287681286.py:11: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame. Try using .loc[row_indexer,col_indexer] = value instead See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy data['tmp_tmp_interaction'] = data['tmp'] * data['tmp'] /var/folders/vd/0_phd7hx2n51y4412862zww00000gp/T/ipykernel_21901/1287681286.py:12: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame. Try using .loc[row_indexer,col_indexer] = value instead See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy data['lnfer_irrigation1_tmp_interaction'] = data['lnfer'] * data['irrigation1'] * data['tmp'] /var/folders/vd/0_phd7hx2n51y4412862zww00000gp/T/ipykernel_21901/1287681286.py:13: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame. Try using .loc[row_indexer,col_indexer] = value instead See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy data['lnfer_irrigation12_tmp_tmp_interaction'] = data['lnfer'] * data['irrigation1'] * data['irrigation1'] * data['tmp'] * data['tmp'] /Users/chenchenren/anaconda3/lib/python3.11/site-packages/statsmodels/base/model.py:1894: ValueWarning: covariance of constraints does not have full rank. The number of constraints is 1257, but rank is 8 warnings.warn('covariance of constraints does not have full '
OLS Regression Results ============================================================================== Dep. Variable: lnyield R-squared: 0.737 Model: OLS Adj. R-squared: 0.684 Within R-squared: 0.315 Method: Least Squares F-statistic: -15082669839380.7 Date: Fri, 15 Nov 2024 Prob (F-statistic): 1.00e+00 Time: 11:18:00 Log-Likelihood: 3532.10 No. Observations: 7507 AIC: -4548.2 Df Residuals: 6249 BIC: 4161.7 Df Model: 1257 Covariance Type: cluster ============================================================================== Coef. Std.Err. z P>|z| [0.025 0.975] Intercept -38.522131 1.519082 -25.358830 7.181326e-142 -41.499476 -35.544786 lnfer 0.006019 0.010023 0.600552 5.481382e-01 -0.013625 0.025664 irrigation1 -0.015905 0.016186 -0.982618 3.257956e-01 -0.047630 0.015820 irrigation12 0.003699 0.001833 2.017882 4.360359e-02 0.000106 0.007293 tmp 0.521217 0.035106 14.847137 7.260372e-50 0.452411 0.590023 tmp_tmp_interaction -0.013340 0.000843 -15.832945 1.843839e-56 -0.014991 -0.011688 pre 0.227345 0.016284 13.961642 2.672075e-44 0.195430 0.259260 pre_pre_interaction -0.014297 0.001160 -12.326442 6.526549e-35 -0.016570 -0.012023 year 0.020107 0.000754 26.675887 8.965906e-157 0.018629 0.021584
#model 2
data = df_soybean[df_soybean['group'] == 2]
# Prepare the interaction terms
data['pre_pre_interaction'] = data['pre'] * data['pre']
data['lnfer_pre_interaction'] = data['lnfer'] * data['pre']
data['lnfer_pre_pre_interaction'] = data['lnfer'] * data['pre'] * data['pre']
data['tmp_tmp_interaction'] = data['tmp'] * data['tmp']
data['lnfer_tmp_interaction'] = data['lnfer'] * data['tmp']
data['lnfer_tmp_tmp_interaction'] = data['lnfer'] * data['tmp'] * data['tmp']
data1=data
# Define the formula for the regression model
formula = 'lnyield ~ lnfer + tmp + tmp_tmp_interaction + pre + pre_pre_interaction +year+ C(geoid)'
# Run the regression using the formula interface with clustered standard errors
model = sm.OLS.from_formula(formula, data=data1).fit(cov_type='cluster', cov_kwds={'groups': data['geoid']})
# Extract the summary table
summary = model.summary2().tables[1]
# Filter out rows corresponding to dummy variables for geoid
exclude_terms = ['C(geoid)']
pattern = '|'.join([re.escape(term) for term in exclude_terms])
filtered_summary2 = summary.loc[~summary.index.str.contains(pattern)]
data1['trend']=data1['year']
data1 = data1.set_index(['geoid','year'])
# Define the formula for the fixed effects regression model with PanelOLS
formula_fe = (
'lnyield ~ lnfer + tmp + tmp_tmp_interaction + pre + pre_pre_interaction +trend+ EntityEffects '
)
# Fit the fixed effects model using PanelOLS
fe_model = PanelOLS.from_formula(formula_fe, data=data1)
fe_results = fe_model.fit(cov_type='robust')
# Format the OLS summary with the within R-squared
ols_info = f"""
OLS Regression Results
==============================================================================
Dep. Variable: lnyield R-squared: {model.rsquared:.3f}
Model: OLS Adj. R-squared: {model.rsquared_adj:.3f}
Within R-squared: {fe_results.rsquared_within:.3f}
Method: Least Squares F-statistic: {model.fvalue:.1f}
Date: {datetime.now():%a, %d %b %Y} Prob (F-statistic): {model.f_pvalue:.2e}
Time: {datetime.now():%H:%M:%S} Log-Likelihood: {model.llf:.2f}
No. Observations: {model.nobs:.0f} AIC: {model.aic:.1f}
Df Residuals: {model.df_resid:.0f} BIC: {model.bic:.1f}
Df Model: {model.df_model:.0f}
Covariance Type: cluster
==============================================================================
"""
# Combine the OLS info with the filtered summary
final_summary2 = ols_info + "\n" + filtered_summary2.to_string()
# Print the final summary
print(final_summary2)
/var/folders/vd/0_phd7hx2n51y4412862zww00000gp/T/ipykernel_21901/2519090951.py:6: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame. Try using .loc[row_indexer,col_indexer] = value instead See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy data['pre_pre_interaction'] = data['pre'] * data['pre'] /var/folders/vd/0_phd7hx2n51y4412862zww00000gp/T/ipykernel_21901/2519090951.py:7: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame. Try using .loc[row_indexer,col_indexer] = value instead See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy data['lnfer_pre_interaction'] = data['lnfer'] * data['pre'] /var/folders/vd/0_phd7hx2n51y4412862zww00000gp/T/ipykernel_21901/2519090951.py:8: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame. Try using .loc[row_indexer,col_indexer] = value instead See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy data['lnfer_pre_pre_interaction'] = data['lnfer'] * data['pre'] * data['pre'] /var/folders/vd/0_phd7hx2n51y4412862zww00000gp/T/ipykernel_21901/2519090951.py:10: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame. Try using .loc[row_indexer,col_indexer] = value instead See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy data['tmp_tmp_interaction'] = data['tmp'] * data['tmp'] /var/folders/vd/0_phd7hx2n51y4412862zww00000gp/T/ipykernel_21901/2519090951.py:11: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame. Try using .loc[row_indexer,col_indexer] = value instead See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy data['lnfer_tmp_interaction'] = data['lnfer'] * data['tmp'] /var/folders/vd/0_phd7hx2n51y4412862zww00000gp/T/ipykernel_21901/2519090951.py:12: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame. Try using .loc[row_indexer,col_indexer] = value instead See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy data['lnfer_tmp_tmp_interaction'] = data['lnfer'] * data['tmp'] * data['tmp'] /Users/chenchenren/anaconda3/lib/python3.11/site-packages/statsmodels/base/model.py:1894: ValueWarning: covariance of constraints does not have full rank. The number of constraints is 1584, but rank is 6 warnings.warn('covariance of constraints does not have full ' /var/folders/vd/0_phd7hx2n51y4412862zww00000gp/T/ipykernel_21901/2519090951.py:29: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame. Try using .loc[row_indexer,col_indexer] = value instead See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy data1['trend']=data1['year']
OLS Regression Results ============================================================================== Dep. Variable: lnyield R-squared: 0.741 Model: OLS Adj. R-squared: 0.692 Within R-squared: 0.319 Method: Least Squares F-statistic: -4862979059989.5 Date: Fri, 15 Nov 2024 Prob (F-statistic): 1.00e+00 Time: 11:18:07 Log-Likelihood: 5641.18 No. Observations: 10075 AIC: -8112.4 Df Residuals: 8490 BIC: 3327.9 Df Model: 1584 Covariance Type: cluster ============================================================================== Coef. Std.Err. z P>|z| [0.025 0.975] Intercept -33.001536 1.073918 -30.730047 2.259444e-207 -35.106376 -30.896697 lnfer 0.002571 0.007563 0.340010 7.338487e-01 -0.012251 0.017394 tmp 0.286666 0.023890 11.999453 3.576503e-33 0.239842 0.333489 tmp_tmp_interaction -0.007424 0.000623 -11.923510 8.926738e-33 -0.008644 -0.006203 pre 0.180895 0.012406 14.581525 3.681989e-48 0.156580 0.205210 pre_pre_interaction -0.011249 0.000888 -12.667173 8.989585e-37 -0.012990 -0.009509 year 0.018752 0.000539 34.807651 1.863234e-265 0.017696 0.019808
#model 3
# Filter data for irrigated
data = df_soybean[df_soybean['group'] == 1]
# Prepare the interaction terms
data['pre_pre_interaction'] = data['pre'] * data['pre']
data['irrigation12'] = data['irrigation1'] * data['irrigation1']
data['lnfer_irrigation1_pre_interaction'] = data['lnfer'] * data['irrigation1'] * data['pre']
data['lnfer_irrigation12_pre_pre_interaction'] = data['lnfer'] * data['irrigation1'] * data['irrigation1'] * data['pre'] * data['pre']
data['tmp_tmp_interaction'] = data['tmp'] * data['tmp']
data['lnfer_irrigation1_tmp_interaction'] = data['lnfer'] * data['irrigation1'] * data['tmp']
data['lnfer_irrigation12_tmp_tmp_interaction'] = data['lnfer'] * data['irrigation1'] * data['irrigation1'] * data['tmp'] * data['tmp']
# Define the formula for the regression model
formula = 'lnyield ~ lnfer + irrigation1 + irrigation12 + tmp + tmp_tmp_interaction + pre + pre_pre_interaction +year+lnfer_irrigation1_tmp_interaction+ lnfer_irrigation12_tmp_tmp_interaction+ C(geoid)'
# Run the regression using the formula interface with clustered standard errors
model = sm.OLS.from_formula(formula, data=data).fit(cov_type='cluster', cov_kwds={'groups': data['geoid']})
# Extract the summary table
summary = model.summary2().tables[1]
# Filter out rows corresponding to dummy variables for geoid
exclude_terms = ['C(geoid)']
pattern = '|'.join([re.escape(term) for term in exclude_terms])
filtered_summary3 = summary.loc[~summary.index.str.contains(pattern)]
fixed_effects_only_model = sm.OLS.from_formula('lnyield ~ C(geoid)', data=data).fit()
# Calculate the total sum of squares (TSS) using the model with only fixed effects
tss_within = np.sum((fixed_effects_only_model.resid) ** 2)
# Calculate the residual sum of squares (RSS) from your main model
rss_within = np.sum((model.resid) ** 2)
# Calculate the within R-squared
within_r2 = 1 - (rss_within / tss_within)
# Format the OLS summary with the within R-squared
ols_info = f"""
OLS Regression Results
==============================================================================
Dep. Variable: lnyield R-squared: {model.rsquared:.3f}
Model: OLS Adj. R-squared: {model.rsquared_adj:.3f}
Within R-squared: {within_r2:.3f}
Method: Least Squares F-statistic: {model.fvalue:.1f}
Date: {datetime.now():%a, %d %b %Y} Prob (F-statistic): {model.f_pvalue:.2e}
Time: {datetime.now():%H:%M:%S} Log-Likelihood: {model.llf:.2f}
No. Observations: {model.nobs:.0f} AIC: {model.aic:.1f}
Df Residuals: {model.df_resid:.0f} BIC: {model.bic:.1f}
Df Model: {model.df_model:.0f}
Covariance Type: cluster
==============================================================================
"""
# Combine the OLS info with the filtered summary
final_summary3 = ols_info + "\n" + filtered_summary3.to_string()
# Print the final summary
print(final_summary3)
/var/folders/vd/0_phd7hx2n51y4412862zww00000gp/T/ipykernel_21901/748763980.py:7: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame. Try using .loc[row_indexer,col_indexer] = value instead See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy data['pre_pre_interaction'] = data['pre'] * data['pre'] /var/folders/vd/0_phd7hx2n51y4412862zww00000gp/T/ipykernel_21901/748763980.py:8: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame. Try using .loc[row_indexer,col_indexer] = value instead See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy data['irrigation12'] = data['irrigation1'] * data['irrigation1'] /var/folders/vd/0_phd7hx2n51y4412862zww00000gp/T/ipykernel_21901/748763980.py:9: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame. Try using .loc[row_indexer,col_indexer] = value instead See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy data['lnfer_irrigation1_pre_interaction'] = data['lnfer'] * data['irrigation1'] * data['pre'] /var/folders/vd/0_phd7hx2n51y4412862zww00000gp/T/ipykernel_21901/748763980.py:10: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame. Try using .loc[row_indexer,col_indexer] = value instead See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy data['lnfer_irrigation12_pre_pre_interaction'] = data['lnfer'] * data['irrigation1'] * data['irrigation1'] * data['pre'] * data['pre'] /var/folders/vd/0_phd7hx2n51y4412862zww00000gp/T/ipykernel_21901/748763980.py:12: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame. Try using .loc[row_indexer,col_indexer] = value instead See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy data['tmp_tmp_interaction'] = data['tmp'] * data['tmp'] /var/folders/vd/0_phd7hx2n51y4412862zww00000gp/T/ipykernel_21901/748763980.py:13: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame. Try using .loc[row_indexer,col_indexer] = value instead See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy data['lnfer_irrigation1_tmp_interaction'] = data['lnfer'] * data['irrigation1'] * data['tmp'] /var/folders/vd/0_phd7hx2n51y4412862zww00000gp/T/ipykernel_21901/748763980.py:14: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame. Try using .loc[row_indexer,col_indexer] = value instead See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy data['lnfer_irrigation12_tmp_tmp_interaction'] = data['lnfer'] * data['irrigation1'] * data['irrigation1'] * data['tmp'] * data['tmp'] /Users/chenchenren/anaconda3/lib/python3.11/site-packages/statsmodels/base/model.py:1894: ValueWarning: covariance of constraints does not have full rank. The number of constraints is 1259, but rank is 10 warnings.warn('covariance of constraints does not have full '
OLS Regression Results ============================================================================== Dep. Variable: lnyield R-squared: 0.737 Model: OLS Adj. R-squared: 0.684 Within R-squared: 0.316 Method: Least Squares F-statistic: 8776751685601.0 Date: Fri, 15 Nov 2024 Prob (F-statistic): 0.00e+00 Time: 11:18:13 Log-Likelihood: 3535.85 No. Observations: 7507 AIC: -4551.7 Df Residuals: 6247 BIC: 4172.0 Df Model: 1259 Covariance Type: cluster ============================================================================== Coef. Std.Err. z P>|z| [0.025 0.975] Intercept -38.321133 1.510202 -25.374835 4.782053e-142 -41.281075 -35.361191 lnfer 0.010810 0.009956 1.085828 2.775552e-01 -0.008703 0.030323 irrigation1 0.050252 0.045064 1.115135 2.647928e-01 -0.038071 0.138576 irrigation12 -0.005374 0.007057 -0.761398 4.464196e-01 -0.019206 0.008459 tmp 0.519718 0.034878 14.901235 3.235414e-50 0.451359 0.588077 tmp_tmp_interaction -0.013297 0.000836 -15.901556 6.181022e-57 -0.014936 -0.011658 pre 0.228471 0.016425 13.910163 5.495409e-44 0.196279 0.260663 pre_pre_interaction -0.014383 0.001171 -12.278522 1.181363e-34 -0.016678 -0.012087 year 0.020004 0.000750 26.659966 1.371681e-156 0.018534 0.021475 lnfer_irrigation1_tmp_interaction -0.001265 0.000770 -1.642240 1.005404e-01 -0.002775 0.000245 lnfer_irrigation12_tmp_tmp_interaction 0.000007 0.000005 1.344235 1.788725e-01 -0.000003 0.000017
#model 4
# Filter data for rainfed
data = df_soybean[df_soybean['group'] == 2]
# Prepare the interaction terms
data['pre_pre_interaction'] = data['pre'] * data['pre']
data['lnfer_pre_interaction'] = data['lnfer'] * data['pre']
data['lnfer_pre_pre_interaction'] = data['lnfer'] * data['pre'] * data['pre']
data['tmp_tmp_interaction'] = data['tmp'] * data['tmp']
data['lnfer_tmp_interaction'] = data['lnfer'] * data['tmp']
data['lnfer_tmp_tmp_interaction'] = data['lnfer'] * data['tmp'] * data['tmp']
data1=data
# Define the formula for the regression model
formula = 'lnyield ~ lnfer + tmp + tmp_tmp_interaction + pre + pre_pre_interaction +year+lnfer_tmp_interaction+ lnfer_tmp_tmp_interaction + C(geoid)'
# Run the regression using the formula interface with clustered standard errors
model = sm.OLS.from_formula(formula, data=data).fit(cov_type='cluster', cov_kwds={'groups': data['geoid']})
# Extract the summary table
summary = model.summary2().tables[1]
# Filter out rows corresponding to dummy variables for geoid
exclude_terms = ['C(geoid)']
pattern = '|'.join([re.escape(term) for term in exclude_terms])
filtered_summary4 = summary.loc[~summary.index.str.contains(pattern)]
data1['trend']=data1['year']
# Calculate the total sum of squares (TSS) using the model with only fixed effects
data1 = data1.set_index(['geoid','year'])
# Define the formula for the fixed effects regression model with PanelOLS
formula_fe = (
'lnyield ~ lnfer + tmp + tmp_tmp_interaction + pre + pre_pre_interaction +lnfer_tmp_interaction+ lnfer_tmp_tmp_interaction +trend+ EntityEffects '
)
# Fit the fixed effects model using PanelOLS
fe_model = PanelOLS.from_formula(formula_fe, data=data1)
fe_results = fe_model.fit(cov_type='robust')
# Format the OLS summary with the within R-squared
ols_info = f"""
OLS Regression Results
==============================================================================
Dep. Variable: lnyield R-squared: {model.rsquared:.3f}
Model: OLS Adj. R-squared: {model.rsquared_adj:.3f}
Within R-squared: {fe_results.rsquared_within:.3f}
Method: Least Squares F-statistic: {model.fvalue:.1f}
Date: {datetime.now():%a, %d %b %Y} Prob (F-statistic): {model.f_pvalue:.2e}
Time: {datetime.now():%H:%M:%S} Log-Likelihood: {model.llf:.2f}
No. Observations: {model.nobs:.0f} AIC: {model.aic:.1f}
Df Residuals: {model.df_resid:.0f} BIC: {model.bic:.1f}
Df Model: {model.df_model:.0f}
Covariance Type: cluster
==============================================================================
"""
# Combine the OLS info with the filtered summary
final_summary4 = ols_info + "\n" + filtered_summary4.to_string()
# Print the final summary
print(final_summary4)
/var/folders/vd/0_phd7hx2n51y4412862zww00000gp/T/ipykernel_21901/942356029.py:7: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame. Try using .loc[row_indexer,col_indexer] = value instead See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy data['pre_pre_interaction'] = data['pre'] * data['pre'] /var/folders/vd/0_phd7hx2n51y4412862zww00000gp/T/ipykernel_21901/942356029.py:8: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame. Try using .loc[row_indexer,col_indexer] = value instead See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy data['lnfer_pre_interaction'] = data['lnfer'] * data['pre'] /var/folders/vd/0_phd7hx2n51y4412862zww00000gp/T/ipykernel_21901/942356029.py:9: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame. Try using .loc[row_indexer,col_indexer] = value instead See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy data['lnfer_pre_pre_interaction'] = data['lnfer'] * data['pre'] * data['pre'] /var/folders/vd/0_phd7hx2n51y4412862zww00000gp/T/ipykernel_21901/942356029.py:11: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame. Try using .loc[row_indexer,col_indexer] = value instead See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy data['tmp_tmp_interaction'] = data['tmp'] * data['tmp'] /var/folders/vd/0_phd7hx2n51y4412862zww00000gp/T/ipykernel_21901/942356029.py:12: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame. Try using .loc[row_indexer,col_indexer] = value instead See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy data['lnfer_tmp_interaction'] = data['lnfer'] * data['tmp'] /var/folders/vd/0_phd7hx2n51y4412862zww00000gp/T/ipykernel_21901/942356029.py:13: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame. Try using .loc[row_indexer,col_indexer] = value instead See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy data['lnfer_tmp_tmp_interaction'] = data['lnfer'] * data['tmp'] * data['tmp']
OLS Regression Results ============================================================================== Dep. Variable: lnyield R-squared: 0.747 Model: OLS Adj. R-squared: 0.700 Within R-squared: 0.336 Method: Least Squares F-statistic: -1325714146266.6 Date: Fri, 15 Nov 2024 Prob (F-statistic): 1.00e+00 Time: 11:18:20 Log-Likelihood: 5766.72 No. Observations: 10075 AIC: -8359.4 Df Residuals: 8488 BIC: 3095.2 Df Model: 1586 Covariance Type: cluster ============================================================================== Coef. Std.Err. z P>|z| [0.025 0.975] Intercept -31.877339 1.130215 -28.204664 5.126040e-175 -34.092520 -29.662158 lnfer -1.439880 0.225001 -6.399437 1.559512e-10 -1.880874 -0.998886 tmp 0.141788 0.050195 2.824753 4.731705e-03 0.043408 0.240168 tmp_tmp_interaction -0.004595 0.001308 -3.513934 4.415227e-04 -0.007157 -0.002032 pre 0.173106 0.012165 14.230210 5.950317e-46 0.149264 0.196948 pre_pre_interaction -0.010701 0.000873 -12.251247 1.654332e-34 -0.012413 -0.008989 year 0.019085 0.000544 35.094109 8.290495e-270 0.018019 0.020151 lnfer_tmp_interaction 0.122043 0.022729 5.369542 7.893673e-08 0.077496 0.166591 lnfer_tmp_tmp_interaction -0.002460 0.000568 -4.334601 1.460244e-05 -0.003573 -0.001348
/Users/chenchenren/anaconda3/lib/python3.11/site-packages/statsmodels/base/model.py:1894: ValueWarning: covariance of constraints does not have full rank. The number of constraints is 1586, but rank is 8 warnings.warn('covariance of constraints does not have full ' /var/folders/vd/0_phd7hx2n51y4412862zww00000gp/T/ipykernel_21901/942356029.py:29: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame. Try using .loc[row_indexer,col_indexer] = value instead See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy data1['trend']=data1['year']
#model 5
# Filter data for irrigated
data = df_soybean[df_soybean['group'] == 1]
# Prepare the interaction terms
data['pre_pre_interaction'] = data['pre'] * data['pre']
data['irrigation12'] = data['irrigation1'] * data['irrigation1']
data['lnfer_irrigation1_pre_interaction'] = data['lnfer'] * data['irrigation1'] * data['pre']
data['lnfer_irrigation12_pre_pre_interaction'] = data['lnfer'] * data['irrigation1'] * data['irrigation1'] * data['pre'] * data['pre']
data['tmp_tmp_interaction'] = data['tmp'] * data['tmp']
data['lnfer_irrigation1_tmp_interaction'] = data['lnfer'] * data['irrigation1'] * data['tmp']
data['lnfer_irrigation12_tmp_tmp_interaction'] = data['lnfer'] * data['irrigation1'] * data['irrigation1'] * data['tmp'] * data['tmp']
# Define the formula for the regression model
formula = 'lnyield ~ lnfer + irrigation1 + irrigation12 + tmp + tmp_tmp_interaction + pre + pre_pre_interaction +year +lnfer_irrigation1_pre_interaction+ lnfer_irrigation12_pre_pre_interaction+ C(geoid)'
# Run the regression using the formula interface with clustered standard errors
model = sm.OLS.from_formula(formula, data=data).fit(cov_type='cluster', cov_kwds={'groups': data['geoid']})
# Extract the summary table
summary = model.summary2().tables[1]
# Filter out rows corresponding to dummy variables for geoid
exclude_terms = ['C(geoid)']
pattern = '|'.join([re.escape(term) for term in exclude_terms])
filtered_summary5 = summary.loc[~summary.index.str.contains(pattern)]
fixed_effects_only_model = sm.OLS.from_formula('lnyield ~ C(geoid)', data=data).fit()
# Calculate the total sum of squares (TSS) using the model with only fixed effects
tss_within = np.sum((fixed_effects_only_model.resid) ** 2)
# Calculate the residual sum of squares (RSS) from your main model
rss_within = np.sum((model.resid) ** 2)
# Calculate the within R-squared
within_r2 = 1 - (rss_within / tss_within)
# Format the OLS summary with the within R-squared
ols_info = f"""
OLS Regression Results
==============================================================================
Dep. Variable: lnyield R-squared: {model.rsquared:.3f}
Model: OLS Adj. R-squared: {model.rsquared_adj:.3f}
Within R-squared: {within_r2:.3f}
Method: Least Squares F-statistic: {model.fvalue:.1f}
Date: {datetime.now():%a, %d %b %Y} Prob (F-statistic): {model.f_pvalue:.2e}
Time: {datetime.now():%H:%M:%S} Log-Likelihood: {model.llf:.2f}
No. Observations: {model.nobs:.0f} AIC: {model.aic:.1f}
Df Residuals: {model.df_resid:.0f} BIC: {model.bic:.1f}
Df Model: {model.df_model:.0f}
Covariance Type: cluster
==============================================================================
"""
# Combine the OLS info with the filtered summary
final_summary5 = ols_info + "\n" + filtered_summary5.to_string()
# Print the final summary
print(final_summary5)
/var/folders/vd/0_phd7hx2n51y4412862zww00000gp/T/ipykernel_21901/1023417680.py:7: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame. Try using .loc[row_indexer,col_indexer] = value instead See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy data['pre_pre_interaction'] = data['pre'] * data['pre'] /var/folders/vd/0_phd7hx2n51y4412862zww00000gp/T/ipykernel_21901/1023417680.py:8: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame. Try using .loc[row_indexer,col_indexer] = value instead See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy data['irrigation12'] = data['irrigation1'] * data['irrigation1'] /var/folders/vd/0_phd7hx2n51y4412862zww00000gp/T/ipykernel_21901/1023417680.py:9: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame. Try using .loc[row_indexer,col_indexer] = value instead See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy data['lnfer_irrigation1_pre_interaction'] = data['lnfer'] * data['irrigation1'] * data['pre'] /var/folders/vd/0_phd7hx2n51y4412862zww00000gp/T/ipykernel_21901/1023417680.py:10: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame. Try using .loc[row_indexer,col_indexer] = value instead See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy data['lnfer_irrigation12_pre_pre_interaction'] = data['lnfer'] * data['irrigation1'] * data['irrigation1'] * data['pre'] * data['pre'] /var/folders/vd/0_phd7hx2n51y4412862zww00000gp/T/ipykernel_21901/1023417680.py:12: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame. Try using .loc[row_indexer,col_indexer] = value instead See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy data['tmp_tmp_interaction'] = data['tmp'] * data['tmp'] /var/folders/vd/0_phd7hx2n51y4412862zww00000gp/T/ipykernel_21901/1023417680.py:13: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame. Try using .loc[row_indexer,col_indexer] = value instead See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy data['lnfer_irrigation1_tmp_interaction'] = data['lnfer'] * data['irrigation1'] * data['tmp'] /var/folders/vd/0_phd7hx2n51y4412862zww00000gp/T/ipykernel_21901/1023417680.py:14: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame. Try using .loc[row_indexer,col_indexer] = value instead See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy data['lnfer_irrigation12_tmp_tmp_interaction'] = data['lnfer'] * data['irrigation1'] * data['irrigation1'] * data['tmp'] * data['tmp'] /Users/chenchenren/anaconda3/lib/python3.11/site-packages/statsmodels/base/model.py:1894: ValueWarning: covariance of constraints does not have full rank. The number of constraints is 1259, but rank is 10 warnings.warn('covariance of constraints does not have full '
OLS Regression Results ============================================================================== Dep. Variable: lnyield R-squared: 0.738 Model: OLS Adj. R-squared: 0.685 Within R-squared: 0.317 Method: Least Squares F-statistic: 13218053090066.6 Date: Fri, 15 Nov 2024 Prob (F-statistic): 0.00e+00 Time: 11:18:26 Log-Likelihood: 3545.73 No. Observations: 7507 AIC: -4571.5 Df Residuals: 6247 BIC: 4152.3 Df Model: 1259 Covariance Type: cluster ============================================================================== Coef. Std.Err. z P>|z| [0.025 0.975] Intercept -38.308661 1.504098 -25.469528 4.290598e-143 -41.256639 -35.360684 lnfer 0.012705 0.010011 1.269066 2.044177e-01 -0.006917 0.032326 irrigation1 0.051396 0.027284 1.883720 5.960288e-02 -0.002080 0.104872 irrigation12 -0.000650 0.002281 -0.284937 7.756924e-01 -0.005122 0.003821 tmp 0.527072 0.035088 15.021570 5.303620e-51 0.458301 0.595842 tmp_tmp_interaction -0.013475 0.000842 -16.006810 1.145340e-57 -0.015125 -0.011825 pre 0.231881 0.016433 14.110700 3.263211e-45 0.199673 0.264089 pre_pre_interaction -0.014535 0.001168 -12.440248 1.580124e-35 -0.016825 -0.012245 year 0.019955 0.000744 26.809175 2.525887e-158 0.018497 0.021414 lnfer_irrigation1_pre_interaction -0.006095 0.001979 -3.080610 2.065769e-03 -0.009973 -0.002217 lnfer_irrigation12_pre_pre_interaction 0.000070 0.000033 2.084066 3.715416e-02 0.000004 0.000135
#model 6
# Filter data for rainfed
data = df_soybean[df_soybean['group'] == 2]
# Prepare the interaction terms
data['pre_pre_interaction'] = data['pre'] * data['pre']
data['lnfer_pre_interaction'] = data['lnfer'] * data['pre']
data['lnfer_pre_pre_interaction'] = data['lnfer'] * data['pre'] * data['pre']
data['tmp_tmp_interaction'] = data['tmp'] * data['tmp']
data['lnfer_tmp_interaction'] = data['lnfer'] * data['tmp']
data['lnfer_tmp_tmp_interaction'] = data['lnfer'] * data['tmp'] * data['tmp']
data1=data
# Define the formula for the regression model
formula = 'lnyield ~ lnfer + tmp + tmp_tmp_interaction + pre + pre_pre_interaction +year+lnfer_pre_interaction+ lnfer_pre_pre_interaction + C(geoid)'
# Run the regression using the formula interface with clustered standard errors
model = sm.OLS.from_formula(formula, data=data).fit(cov_type='cluster', cov_kwds={'groups': data['geoid']})
# Extract the summary table
summary = model.summary2().tables[1]
# Filter out rows corresponding to dummy variables for geoid
exclude_terms = ['C(geoid)']
pattern = '|'.join([re.escape(term) for term in exclude_terms])
filtered_summary6 = summary.loc[~summary.index.str.contains(pattern)]
data1['trend']=data1['year']
data1 = data1.set_index(['geoid','year'])
# Define the formula for the fixed effects regression model with PanelOLS
formula_fe = (
'lnyield ~ lnfer + tmp + tmp_tmp_interaction + pre + pre_pre_interaction +lnfer_pre_interaction+ lnfer_pre_pre_interaction +trend+ EntityEffects '
)
# Fit the fixed effects model using PanelOLS
fe_model = PanelOLS.from_formula(formula_fe, data=data1)
fe_results = fe_model.fit(cov_type='robust')
# Format the OLS summary with the within R-squared
ols_info = f"""
OLS Regression Results
==============================================================================
Dep. Variable: lnyield R-squared: {model.rsquared:.3f}
Model: OLS Adj. R-squared: {model.rsquared_adj:.3f}
Within R-squared: {fe_results.rsquared_within:.3f}
Method: Least Squares F-statistic: {model.fvalue:.1f}
Date: {datetime.now():%a, %d %b %Y} Prob (F-statistic): {model.f_pvalue:.2e}
Time: {datetime.now():%H:%M:%S} Log-Likelihood: {model.llf:.2f}
No. Observations: {model.nobs:.0f} AIC: {model.aic:.1f}
Df Residuals: {model.df_resid:.0f} BIC: {model.bic:.1f}
Df Model: {model.df_model:.0f}
Covariance Type: cluster
==============================================================================
"""
# Combine the OLS info with the filtered summary
final_summary6 = ols_info + "\n" + filtered_summary6.to_string()
# Print the final summary
print(final_summary6)
/var/folders/vd/0_phd7hx2n51y4412862zww00000gp/T/ipykernel_21901/1781115856.py:7: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame. Try using .loc[row_indexer,col_indexer] = value instead See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy data['pre_pre_interaction'] = data['pre'] * data['pre'] /var/folders/vd/0_phd7hx2n51y4412862zww00000gp/T/ipykernel_21901/1781115856.py:8: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame. Try using .loc[row_indexer,col_indexer] = value instead See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy data['lnfer_pre_interaction'] = data['lnfer'] * data['pre'] /var/folders/vd/0_phd7hx2n51y4412862zww00000gp/T/ipykernel_21901/1781115856.py:9: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame. Try using .loc[row_indexer,col_indexer] = value instead See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy data['lnfer_pre_pre_interaction'] = data['lnfer'] * data['pre'] * data['pre'] /var/folders/vd/0_phd7hx2n51y4412862zww00000gp/T/ipykernel_21901/1781115856.py:11: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame. Try using .loc[row_indexer,col_indexer] = value instead See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy data['tmp_tmp_interaction'] = data['tmp'] * data['tmp'] /var/folders/vd/0_phd7hx2n51y4412862zww00000gp/T/ipykernel_21901/1781115856.py:12: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame. Try using .loc[row_indexer,col_indexer] = value instead See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy data['lnfer_tmp_interaction'] = data['lnfer'] * data['tmp'] /var/folders/vd/0_phd7hx2n51y4412862zww00000gp/T/ipykernel_21901/1781115856.py:13: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame. Try using .loc[row_indexer,col_indexer] = value instead See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy data['lnfer_tmp_tmp_interaction'] = data['lnfer'] * data['tmp'] * data['tmp']
OLS Regression Results ============================================================================== Dep. Variable: lnyield R-squared: 0.742 Model: OLS Adj. R-squared: 0.694 Within R-squared: 0.323 Method: Least Squares F-statistic: -3726253873595.7 Date: Fri, 15 Nov 2024 Prob (F-statistic): 1.00e+00 Time: 11:18:32 Log-Likelihood: 5668.72 No. Observations: 10075 AIC: -8163.4 Df Residuals: 8488 BIC: 3291.2 Df Model: 1586 Covariance Type: cluster ============================================================================== Coef. Std.Err. z P>|z| [0.025 0.975] Intercept -32.265893 1.081751 -29.827472 1.720670e-195 -34.386086 -30.145701 lnfer -0.098089 0.044472 -2.205649 2.740859e-02 -0.185252 -0.010926 tmp 0.287700 0.024073 11.950925 6.420654e-33 0.240517 0.334883 tmp_tmp_interaction -0.007440 0.000627 -11.871190 1.670733e-32 -0.008668 -0.006211 pre 0.151558 0.022613 6.702369 2.050670e-11 0.107238 0.195878 pre_pre_interaction -0.009969 0.001658 -6.013411 1.816593e-09 -0.013218 -0.006720 year 0.018450 0.000541 34.116538 4.194062e-255 0.017390 0.019510 lnfer_pre_interaction 0.024954 0.012817 1.946900 5.154671e-02 -0.000167 0.050075 lnfer_pre_pre_interaction -0.001287 0.000892 -1.443681 1.488287e-01 -0.003035 0.000460
/Users/chenchenren/anaconda3/lib/python3.11/site-packages/statsmodels/base/model.py:1894: ValueWarning: covariance of constraints does not have full rank. The number of constraints is 1586, but rank is 8 warnings.warn('covariance of constraints does not have full ' /var/folders/vd/0_phd7hx2n51y4412862zww00000gp/T/ipykernel_21901/1781115856.py:30: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame. Try using .loc[row_indexer,col_indexer] = value instead See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy data1['trend']=data1['year']
#model 7
# Filter data for irrigated
data = df_soybean[df_soybean['group'] == 1]
# Prepare the interaction terms
data['pre_pre_interaction'] = data['pre'] * data['pre']
data['irrigation12'] = data['irrigation1'] * data['irrigation1']
data['lnfer_irrigation1_pre_interaction'] = data['lnfer'] * data['irrigation1'] * data['pre']
data['lnfer_irrigation12_pre_pre_interaction'] = data['lnfer'] * data['irrigation1'] * data['irrigation1'] * data['pre'] * data['pre']
data['tmp_tmp_interaction'] = data['tmp'] * data['tmp']
data['lnfer_irrigation1_tmp_interaction'] = data['lnfer'] * data['irrigation1'] * data['tmp']
data['lnfer_irrigation12_tmp_tmp_interaction'] = data['lnfer'] * data['irrigation1'] * data['irrigation1'] * data['tmp'] * data['tmp']
# Define the formula for the regression model
formula = 'lnyield ~ lnfer + irrigation1 + irrigation12 + tmp + tmp_tmp_interaction +year+ lnfer_irrigation1_tmp_interaction+ lnfer_irrigation12_tmp_tmp_interaction+lnfer_irrigation1_pre_interaction+ lnfer_irrigation12_pre_pre_interaction+ pre + pre_pre_interaction + C(geoid)'
# Run the regression using the formula interface with clustered standard errors
model = sm.OLS.from_formula(formula, data=data).fit(cov_type='cluster', cov_kwds={'groups': data['geoid']})
# Extract the summary table
summary = model.summary2().tables[1]
# Filter out rows corresponding to dummy variables for geoid
exclude_terms = ['C(geoid)']
pattern = '|'.join([re.escape(term) for term in exclude_terms])
filtered_summary7 = summary.loc[~summary.index.str.contains(pattern)]
fixed_effects_only_model = sm.OLS.from_formula('lnyield ~ C(geoid)', data=data).fit()
# Calculate the total sum of squares (TSS) using the model with only fixed effects
tss_within = np.sum((fixed_effects_only_model.resid) ** 2)
# Calculate the residual sum of squares (RSS) from your main model
rss_within = np.sum((model.resid) ** 2)
# Calculate the within R-squared
within_r2 = 1 - (rss_within / tss_within)
# Format the OLS summary with the within R-squared
ols_info = f"""
OLS Regression Results
==============================================================================
Dep. Variable: lnyield R-squared: {model.rsquared:.3f}
Model: OLS Adj. R-squared: {model.rsquared_adj:.3f}
Within R-squared: {within_r2:.3f}
Method: Least Squares F-statistic: {model.fvalue:.1f}
Date: {datetime.now():%a, %d %b %Y} Prob (F-statistic): {model.f_pvalue:.2e}
Time: {datetime.now():%H:%M:%S} Log-Likelihood: {model.llf:.2f}
No. Observations: {model.nobs:.0f} AIC: {model.aic:.1f}
Df Residuals: {model.df_resid:.0f} BIC: {model.bic:.1f}
Df Model: {model.df_model:.0f}
Covariance Type: cluster
==============================================================================
"""
# Combine the OLS info with the filtered summary
final_summary7 = ols_info + "\n" + filtered_summary7.to_string()
# Print the final summary
print(final_summary7)
/var/folders/vd/0_phd7hx2n51y4412862zww00000gp/T/ipykernel_21901/1137327791.py:7: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame. Try using .loc[row_indexer,col_indexer] = value instead See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy data['pre_pre_interaction'] = data['pre'] * data['pre'] /var/folders/vd/0_phd7hx2n51y4412862zww00000gp/T/ipykernel_21901/1137327791.py:8: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame. Try using .loc[row_indexer,col_indexer] = value instead See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy data['irrigation12'] = data['irrigation1'] * data['irrigation1'] /var/folders/vd/0_phd7hx2n51y4412862zww00000gp/T/ipykernel_21901/1137327791.py:9: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame. Try using .loc[row_indexer,col_indexer] = value instead See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy data['lnfer_irrigation1_pre_interaction'] = data['lnfer'] * data['irrigation1'] * data['pre'] /var/folders/vd/0_phd7hx2n51y4412862zww00000gp/T/ipykernel_21901/1137327791.py:10: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame. Try using .loc[row_indexer,col_indexer] = value instead See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy data['lnfer_irrigation12_pre_pre_interaction'] = data['lnfer'] * data['irrigation1'] * data['irrigation1'] * data['pre'] * data['pre'] /var/folders/vd/0_phd7hx2n51y4412862zww00000gp/T/ipykernel_21901/1137327791.py:12: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame. Try using .loc[row_indexer,col_indexer] = value instead See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy data['tmp_tmp_interaction'] = data['tmp'] * data['tmp'] /var/folders/vd/0_phd7hx2n51y4412862zww00000gp/T/ipykernel_21901/1137327791.py:13: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame. Try using .loc[row_indexer,col_indexer] = value instead See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy data['lnfer_irrigation1_tmp_interaction'] = data['lnfer'] * data['irrigation1'] * data['tmp'] /var/folders/vd/0_phd7hx2n51y4412862zww00000gp/T/ipykernel_21901/1137327791.py:14: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame. Try using .loc[row_indexer,col_indexer] = value instead See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy data['lnfer_irrigation12_tmp_tmp_interaction'] = data['lnfer'] * data['irrigation1'] * data['irrigation1'] * data['tmp'] * data['tmp'] /Users/chenchenren/anaconda3/lib/python3.11/site-packages/statsmodels/base/model.py:1894: ValueWarning: covariance of constraints does not have full rank. The number of constraints is 1261, but rank is 12 warnings.warn('covariance of constraints does not have full '
OLS Regression Results ============================================================================== Dep. Variable: lnyield R-squared: 0.739 Model: OLS Adj. R-squared: 0.686 Within R-squared: 0.319 Method: Least Squares F-statistic: 6558362820053.0 Date: Fri, 15 Nov 2024 Prob (F-statistic): 0.00e+00 Time: 11:18:37 Log-Likelihood: 3554.06 No. Observations: 7507 AIC: -4584.1 Df Residuals: 6245 BIC: 4153.5 Df Model: 1261 Covariance Type: cluster ============================================================================== Coef. Std.Err. z P>|z| [0.025 0.975] Intercept -38.381111 1.506789 -25.472124 4.015698e-143 -41.334363 -35.427859 lnfer 0.010936 0.009941 1.100146 2.712687e-01 -0.008547 0.030420 irrigation1 0.024494 0.044406 0.551591 5.812284e-01 -0.062540 0.111528 irrigation12 -0.002255 0.006684 -0.337387 7.358255e-01 -0.015357 0.010846 tmp 0.538854 0.035216 15.301419 7.480764e-53 0.469832 0.607876 tmp_tmp_interaction -0.013766 0.000845 -16.292207 1.121090e-59 -0.015422 -0.012110 year 0.019934 0.000743 26.820682 1.854482e-158 0.018477 0.021391 lnfer_irrigation1_tmp_interaction 0.002105 0.001276 1.649260 9.909437e-02 -0.000397 0.004606 lnfer_irrigation12_tmp_tmp_interaction -0.000002 0.000006 -0.409528 6.821521e-01 -0.000013 0.000009 lnfer_irrigation1_pre_interaction -0.012644 0.003686 -3.430637 6.021650e-04 -0.019868 -0.005420 lnfer_irrigation12_pre_pre_interaction 0.000119 0.000041 2.929187 3.398503e-03 0.000040 0.000199 pre 0.233074 0.016260 14.334505 1.331920e-46 0.201206 0.264942 pre_pre_interaction -0.014487 0.001155 -12.542262 4.383037e-36 -0.016751 -0.012223
#model 8
# Filter data for rainfed
data = df_soybean[df_soybean['group'] == 2]
# Prepare the interaction terms
data['pre_pre_interaction'] = data['pre'] * data['pre']
data['lnfer_pre_interaction'] = data['lnfer'] * data['pre']
data['lnfer_pre_pre_interaction'] = data['lnfer'] * data['pre'] * data['pre']
data['tmp_tmp_interaction'] = data['tmp'] * data['tmp']
data['lnfer_tmp_interaction'] = data['lnfer'] * data['tmp']
data['lnfer_tmp_tmp_interaction'] = data['lnfer'] * data['tmp'] * data['tmp']
data1=data
# Define the formula for the regression model
formula = 'lnyield ~ lnfer + tmp + tmp_tmp_interaction + pre +pre_pre_interaction+year+ lnfer_tmp_interaction+lnfer_tmp_tmp_interaction +lnfer_pre_interaction+ lnfer_pre_pre_interaction + C(geoid)'
# Run the regression using the formula interface with clustered standard errors
model = sm.OLS.from_formula(formula, data=data).fit(cov_type='cluster', cov_kwds={'groups': data['geoid']})
# Extract the summary table
summary = model.summary2().tables[1]
# Filter out rows corresponding to dummy variables for geoid
exclude_terms = ['C(geoid)']
pattern = '|'.join([re.escape(term) for term in exclude_terms])
filtered_summary8 = summary.loc[~summary.index.str.contains(pattern)]
data1['trend']=data1['year']
data1 = data1.set_index(['geoid','year'])
# Define the formula for the fixed effects regression model with PanelOLS
formula_fe = (
'lnyield ~ lnfer + tmp + tmp_tmp_interaction + pre +pre_pre_interaction+trend+ lnfer_tmp_interaction+lnfer_tmp_tmp_interaction +lnfer_pre_interaction+ lnfer_pre_pre_interaction + EntityEffects '
)
# Fit the fixed effects model using PanelOLS
fe_model = PanelOLS.from_formula(formula_fe, data=data1)
fe_results = fe_model.fit(cov_type='robust')
# Format the OLS summary with the within R-squared
ols_info = f"""
OLS Regression Results
==============================================================================
Dep. Variable: lnyield R-squared: {model.rsquared:.3f}
Model: OLS Adj. R-squared: {model.rsquared_adj:.3f}
Within R-squared: {fe_results.rsquared_within:.3f}
Method: Least Squares F-statistic: {model.fvalue:.1f}
Date: {datetime.now():%a, %d %b %Y} Prob (F-statistic): {model.f_pvalue:.2e}
Time: {datetime.now():%H:%M:%S} Log-Likelihood: {model.llf:.2f}
No. Observations: {model.nobs:.0f} AIC: {model.aic:.1f}
Df Residuals: {model.df_resid:.0f} BIC: {model.bic:.1f}
Df Model: {model.df_model:.0f}
Covariance Type: cluster
==============================================================================
"""
# Combine the OLS info with the filtered summary
final_summary8 = ols_info + "\n" + filtered_summary8.to_string()
# Print the final summary
print(final_summary8)
/var/folders/vd/0_phd7hx2n51y4412862zww00000gp/T/ipykernel_21901/4225165077.py:7: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame. Try using .loc[row_indexer,col_indexer] = value instead See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy data['pre_pre_interaction'] = data['pre'] * data['pre'] /var/folders/vd/0_phd7hx2n51y4412862zww00000gp/T/ipykernel_21901/4225165077.py:8: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame. Try using .loc[row_indexer,col_indexer] = value instead See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy data['lnfer_pre_interaction'] = data['lnfer'] * data['pre'] /var/folders/vd/0_phd7hx2n51y4412862zww00000gp/T/ipykernel_21901/4225165077.py:9: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame. Try using .loc[row_indexer,col_indexer] = value instead See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy data['lnfer_pre_pre_interaction'] = data['lnfer'] * data['pre'] * data['pre'] /var/folders/vd/0_phd7hx2n51y4412862zww00000gp/T/ipykernel_21901/4225165077.py:11: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame. Try using .loc[row_indexer,col_indexer] = value instead See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy data['tmp_tmp_interaction'] = data['tmp'] * data['tmp'] /var/folders/vd/0_phd7hx2n51y4412862zww00000gp/T/ipykernel_21901/4225165077.py:12: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame. Try using .loc[row_indexer,col_indexer] = value instead See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy data['lnfer_tmp_interaction'] = data['lnfer'] * data['tmp'] /var/folders/vd/0_phd7hx2n51y4412862zww00000gp/T/ipykernel_21901/4225165077.py:13: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame. Try using .loc[row_indexer,col_indexer] = value instead See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy data['lnfer_tmp_tmp_interaction'] = data['lnfer'] * data['tmp'] * data['tmp']
OLS Regression Results ============================================================================== Dep. Variable: lnyield R-squared: 0.749 Model: OLS Adj. R-squared: 0.701 Within R-squared: 0.340 Method: Least Squares F-statistic: -1331501833556.8 Date: Fri, 15 Nov 2024 Prob (F-statistic): 1.00e+00 Time: 11:18:44 Log-Likelihood: 5793.48 No. Observations: 10075 AIC: -8409.0 Df Residuals: 8486 BIC: 3060.1 Df Model: 1588 Covariance Type: cluster ============================================================================== Coef. Std.Err. z P>|z| [0.025 0.975] Intercept -31.351247 1.147194 -27.328638 1.937995e-164 -33.599705 -29.102788 lnfer -1.437603 0.230179 -6.245593 4.221942e-10 -1.888745 -0.986461 tmp 0.162487 0.050122 3.241813 1.187719e-03 0.064249 0.260725 tmp_tmp_interaction -0.005111 0.001307 -3.910844 9.197420e-05 -0.007673 -0.002550 pre 0.137877 0.022308 6.180549 6.387919e-10 0.094154 0.181601 pre_pre_interaction -0.008889 0.001644 -5.407629 6.386448e-08 -0.012111 -0.005667 year 0.018801 0.000549 34.229622 8.768058e-257 0.017724 0.019877 lnfer_tmp_interaction 0.110635 0.022230 4.976855 6.462567e-07 0.067065 0.154204 lnfer_tmp_tmp_interaction -0.002177 0.000555 -3.926001 8.636975e-05 -0.003264 -0.001090 lnfer_pre_interaction 0.028565 0.012511 2.283232 2.241673e-02 0.004044 0.053085 lnfer_pre_pre_interaction -0.001607 0.000873 -1.841397 6.556340e-02 -0.003317 0.000103
/Users/chenchenren/anaconda3/lib/python3.11/site-packages/statsmodels/base/model.py:1894: ValueWarning: covariance of constraints does not have full rank. The number of constraints is 1588, but rank is 10 warnings.warn('covariance of constraints does not have full ' /var/folders/vd/0_phd7hx2n51y4412862zww00000gp/T/ipykernel_21901/4225165077.py:29: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame. Try using .loc[row_indexer,col_indexer] = value instead See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy data1['trend']=data1['year']
# Placeholder dictionary to store results for each final_summary
results_dict = {}
def add_significance_asterisks(coef, p_value):
if p_value <= 0.01:
return f"{round(coef, 3)}***"
elif p_value <= 0.05:
return f"{round(coef, 3)}**"
elif p_value <= 0.1:
return f"{round(coef, 3)}*"
else:
return f"{round(coef, 3)}"
# Define a function to process each final_summary DataFrame
def process_summary(summary, model_name):
# Create a list to store the structured data for this summary
structured_data = []
for variable, row in summary.iterrows():
# Apply significance asterisks to the coefficient based on p-value
coef_with_significance = add_significance_asterisks(row['Coef.'], row['P>|z|'])
# Format standard error with parentheses
std_err_with_parentheses = f"({round(row['Std.Err.'], 3)})"
# Append coefficient with significance, standard error with parentheses, and p-value as separate rows for each variable
structured_data.append((variable, 'Coef.', coef_with_significance))
structured_data.append((variable, 'Std.Err.', std_err_with_parentheses))
structured_data.append((variable, 'P>|z|', round(row['P>|z|'], 3)))
# Convert to DataFrame and return
return pd.DataFrame(structured_data, columns=['Variable', 'Metric', 'Value']).set_index(['Variable', 'Metric'])
# List of your final summaries
filtered_summaries = [filtered_summary1, filtered_summary2, filtered_summary3, filtered_summary4,
filtered_summary5, filtered_summary6, filtered_summary7, filtered_summary8]
# Loop over each final_summary and process it
for i, summary in enumerate(filtered_summaries, start=1):
# Process each summary and add to the dictionary
results_dict[f'Model_{i}'] = process_summary(summary, f'Model_{i}')
# Concatenate all model results along columns
final_results = pd.concat(results_dict, axis=1)
# Export to Excel
final_results.to_excel("model_summary_table12.xlsx")
print(final_results)
print("Model summary statistics with significance have been saved to model_summary_statistics_with_significance.xlsx")
Model_1 Model_2 \ Value Value Variable Metric Intercept Coef. -38.522*** -33.002*** Std.Err. (1.519) (1.074) P>|z| 0.0 0.0 lnfer Coef. 0.006 0.003 Std.Err. (0.01) (0.008) P>|z| 0.548 0.734 irrigation1 Coef. -0.016 NaN Std.Err. (0.016) NaN P>|z| 0.326 NaN irrigation12 Coef. 0.004** NaN Std.Err. (0.002) NaN P>|z| 0.044 NaN tmp Coef. 0.521*** 0.287*** Std.Err. (0.035) (0.024) P>|z| 0.0 0.0 tmp_tmp_interaction Coef. -0.013*** -0.007*** Std.Err. (0.001) (0.001) P>|z| 0.0 0.0 pre Coef. 0.227*** 0.181*** Std.Err. (0.016) (0.012) P>|z| 0.0 0.0 pre_pre_interaction Coef. -0.014*** -0.011*** Std.Err. (0.001) (0.001) P>|z| 0.0 0.0 year Coef. 0.02*** 0.019*** Std.Err. (0.001) (0.001) P>|z| 0.0 0.0 lnfer_irrigation1_tmp_interaction Coef. NaN NaN Std.Err. NaN NaN P>|z| NaN NaN lnfer_irrigation12_tmp_tmp_interaction Coef. NaN NaN Std.Err. NaN NaN P>|z| NaN NaN lnfer_tmp_interaction Coef. NaN NaN Std.Err. NaN NaN P>|z| NaN NaN lnfer_tmp_tmp_interaction Coef. NaN NaN Std.Err. NaN NaN P>|z| NaN NaN lnfer_irrigation1_pre_interaction Coef. NaN NaN Std.Err. NaN NaN P>|z| NaN NaN lnfer_irrigation12_pre_pre_interaction Coef. NaN NaN Std.Err. NaN NaN P>|z| NaN NaN lnfer_pre_interaction Coef. NaN NaN Std.Err. NaN NaN P>|z| NaN NaN lnfer_pre_pre_interaction Coef. NaN NaN Std.Err. NaN NaN P>|z| NaN NaN Model_3 Model_4 \ Value Value Variable Metric Intercept Coef. -38.321*** -31.877*** Std.Err. (1.51) (1.13) P>|z| 0.0 0.0 lnfer Coef. 0.011 -1.44*** Std.Err. (0.01) (0.225) P>|z| 0.278 0.0 irrigation1 Coef. 0.05 NaN Std.Err. (0.045) NaN P>|z| 0.265 NaN irrigation12 Coef. -0.005 NaN Std.Err. (0.007) NaN P>|z| 0.446 NaN tmp Coef. 0.52*** 0.142*** Std.Err. (0.035) (0.05) P>|z| 0.0 0.005 tmp_tmp_interaction Coef. -0.013*** -0.005*** Std.Err. (0.001) (0.001) P>|z| 0.0 0.0 pre Coef. 0.228*** 0.173*** Std.Err. (0.016) (0.012) P>|z| 0.0 0.0 pre_pre_interaction Coef. -0.014*** -0.011*** Std.Err. (0.001) (0.001) P>|z| 0.0 0.0 year Coef. 0.02*** 0.019*** Std.Err. (0.001) (0.001) P>|z| 0.0 0.0 lnfer_irrigation1_tmp_interaction Coef. -0.001 NaN Std.Err. (0.001) NaN P>|z| 0.101 NaN lnfer_irrigation12_tmp_tmp_interaction Coef. 0.0 NaN Std.Err. (0.0) NaN P>|z| 0.179 NaN lnfer_tmp_interaction Coef. NaN 0.122*** Std.Err. NaN (0.023) P>|z| NaN 0.0 lnfer_tmp_tmp_interaction Coef. NaN -0.002*** Std.Err. NaN (0.001) P>|z| NaN 0.0 lnfer_irrigation1_pre_interaction Coef. NaN NaN Std.Err. NaN NaN P>|z| NaN NaN lnfer_irrigation12_pre_pre_interaction Coef. NaN NaN Std.Err. NaN NaN P>|z| NaN NaN lnfer_pre_interaction Coef. NaN NaN Std.Err. NaN NaN P>|z| NaN NaN lnfer_pre_pre_interaction Coef. NaN NaN Std.Err. NaN NaN P>|z| NaN NaN Model_5 Model_6 \ Value Value Variable Metric Intercept Coef. -38.309*** -32.266*** Std.Err. (1.504) (1.082) P>|z| 0.0 0.0 lnfer Coef. 0.013 -0.098** Std.Err. (0.01) (0.044) P>|z| 0.204 0.027 irrigation1 Coef. 0.051* NaN Std.Err. (0.027) NaN P>|z| 0.06 NaN irrigation12 Coef. -0.001 NaN Std.Err. (0.002) NaN P>|z| 0.776 NaN tmp Coef. 0.527*** 0.288*** Std.Err. (0.035) (0.024) P>|z| 0.0 0.0 tmp_tmp_interaction Coef. -0.013*** -0.007*** Std.Err. (0.001) (0.001) P>|z| 0.0 0.0 pre Coef. 0.232*** 0.152*** Std.Err. (0.016) (0.023) P>|z| 0.0 0.0 pre_pre_interaction Coef. -0.015*** -0.01*** Std.Err. (0.001) (0.002) P>|z| 0.0 0.0 year Coef. 0.02*** 0.018*** Std.Err. (0.001) (0.001) P>|z| 0.0 0.0 lnfer_irrigation1_tmp_interaction Coef. NaN NaN Std.Err. NaN NaN P>|z| NaN NaN lnfer_irrigation12_tmp_tmp_interaction Coef. NaN NaN Std.Err. NaN NaN P>|z| NaN NaN lnfer_tmp_interaction Coef. NaN NaN Std.Err. NaN NaN P>|z| NaN NaN lnfer_tmp_tmp_interaction Coef. NaN NaN Std.Err. NaN NaN P>|z| NaN NaN lnfer_irrigation1_pre_interaction Coef. -0.006*** NaN Std.Err. (0.002) NaN P>|z| 0.002 NaN lnfer_irrigation12_pre_pre_interaction Coef. 0.0** NaN Std.Err. (0.0) NaN P>|z| 0.037 NaN lnfer_pre_interaction Coef. NaN 0.025* Std.Err. NaN (0.013) P>|z| NaN 0.052 lnfer_pre_pre_interaction Coef. NaN -0.001 Std.Err. NaN (0.001) P>|z| NaN 0.149 Model_7 Model_8 Value Value Variable Metric Intercept Coef. -38.381*** -31.351*** Std.Err. (1.507) (1.147) P>|z| 0.0 0.0 lnfer Coef. 0.011 -1.438*** Std.Err. (0.01) (0.23) P>|z| 0.271 0.0 irrigation1 Coef. 0.024 NaN Std.Err. (0.044) NaN P>|z| 0.581 NaN irrigation12 Coef. -0.002 NaN Std.Err. (0.007) NaN P>|z| 0.736 NaN tmp Coef. 0.539*** 0.162*** Std.Err. (0.035) (0.05) P>|z| 0.0 0.001 tmp_tmp_interaction Coef. -0.014*** -0.005*** Std.Err. (0.001) (0.001) P>|z| 0.0 0.0 pre Coef. 0.233*** 0.138*** Std.Err. (0.016) (0.022) P>|z| 0.0 0.0 pre_pre_interaction Coef. -0.014*** -0.009*** Std.Err. (0.001) (0.002) P>|z| 0.0 0.0 year Coef. 0.02*** 0.019*** Std.Err. (0.001) (0.001) P>|z| 0.0 0.0 lnfer_irrigation1_tmp_interaction Coef. 0.002* NaN Std.Err. (0.001) NaN P>|z| 0.099 NaN lnfer_irrigation12_tmp_tmp_interaction Coef. -0.0 NaN Std.Err. (0.0) NaN P>|z| 0.682 NaN lnfer_tmp_interaction Coef. NaN 0.111*** Std.Err. NaN (0.022) P>|z| NaN 0.0 lnfer_tmp_tmp_interaction Coef. NaN -0.002*** Std.Err. NaN (0.001) P>|z| NaN 0.0 lnfer_irrigation1_pre_interaction Coef. -0.013*** NaN Std.Err. (0.004) NaN P>|z| 0.001 NaN lnfer_irrigation12_pre_pre_interaction Coef. 0.0*** NaN Std.Err. (0.0) NaN P>|z| 0.003 NaN lnfer_pre_interaction Coef. NaN 0.029** Std.Err. NaN (0.013) P>|z| NaN 0.022 lnfer_pre_pre_interaction Coef. NaN -0.002* Std.Err. NaN (0.001) P>|z| NaN 0.066 Model summary statistics with significance have been saved to model_summary_statistics_with_significance.xlsx
#model 1
# Filter data for irrigated
data = df_soybean[df_soybean['group'] == 1]
# Prepare the interaction terms
data['pre_pre_interaction'] = data['pre'] * data['pre']
data['irrigation12'] = data['irrigation1'] * data['irrigation1']
data['lnfer_irrigation1_pre_interaction'] = data['lnfer'] * data['irrigation1'] * data['pre']
data['lnfer_irrigation12_pre_pre_interaction'] = data['lnfer'] * data['irrigation1'] * data['irrigation1'] * data['pre'] * data['pre']
data['tmp_tmp_interaction'] = data['tmp'] * data['tmp']
data['lnfer_irrigation1_tmp_interaction'] = data['lnfer'] * data['irrigation1'] * data['tmp']
data['lnfer_irrigation12_tmp_tmp_interaction'] = data['lnfer'] * data['irrigation1'] * data['irrigation1'] * data['tmp'] * data['tmp']
data['year_geoid'] = data['year'] * data['geoid']
# Define the formula for the regression model
formula = 'lnyield ~ lnfer + irrigation1 + irrigation12 + tmp + tmp_tmp_interaction + pre + pre_pre_interaction +year + year_geoid+ C(geoid)'
# Run the regression using the formula interface with clustered standard errors
model = sm.OLS.from_formula(formula, data=data).fit(cov_type='cluster', cov_kwds={'groups': data['geoid']})
# Extract the summary table
summary = model.summary2().tables[1]
# Filter out rows corresponding to dummy variables for geoid
exclude_terms = ['C(geoid)']
pattern = '|'.join([re.escape(term) for term in exclude_terms])
filtered_summary1 = summary.loc[~summary.index.str.contains(pattern)]
fixed_effects_only_model = sm.OLS.from_formula('lnyield ~ C(geoid)', data=data).fit()
# Calculate the total sum of squares (TSS) using the model with only fixed effects
tss_within = np.sum((fixed_effects_only_model.resid) ** 2)
# Calculate the residual sum of squares (RSS) from your main model
rss_within = np.sum((model.resid) ** 2)
# Calculate the within R-squared
within_r2 = 1 - (rss_within / tss_within)
# Format the OLS summary with the within R-squared
ols_info = f"""
OLS Regression Results
==============================================================================
Dep. Variable: lnyield R-squared: {model.rsquared:.3f}
Model: OLS Adj. R-squared: {model.rsquared_adj:.3f}
Within R-squared: {within_r2:.3f}
Method: Least Squares F-statistic: {model.fvalue:.1f}
Date: {datetime.now():%a, %d %b %Y} Prob (F-statistic): {model.f_pvalue:.2e}
Time: {datetime.now():%H:%M:%S} Log-Likelihood: {model.llf:.2f}
No. Observations: {model.nobs:.0f} AIC: {model.aic:.1f}
Df Residuals: {model.df_resid:.0f} BIC: {model.bic:.1f}
Df Model: {model.df_model:.0f}
Covariance Type: cluster
==============================================================================
"""
# Combine the OLS info with the filtered summary
final_summary1 = ols_info + "\n" + filtered_summary1.to_string()
# Print the final summary
print(final_summary1)
/var/folders/vd/0_phd7hx2n51y4412862zww00000gp/T/ipykernel_21901/1789790976.py:7: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame. Try using .loc[row_indexer,col_indexer] = value instead See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy data['pre_pre_interaction'] = data['pre'] * data['pre'] /var/folders/vd/0_phd7hx2n51y4412862zww00000gp/T/ipykernel_21901/1789790976.py:8: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame. Try using .loc[row_indexer,col_indexer] = value instead See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy data['irrigation12'] = data['irrigation1'] * data['irrigation1'] /var/folders/vd/0_phd7hx2n51y4412862zww00000gp/T/ipykernel_21901/1789790976.py:9: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame. Try using .loc[row_indexer,col_indexer] = value instead See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy data['lnfer_irrigation1_pre_interaction'] = data['lnfer'] * data['irrigation1'] * data['pre'] /var/folders/vd/0_phd7hx2n51y4412862zww00000gp/T/ipykernel_21901/1789790976.py:10: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame. Try using .loc[row_indexer,col_indexer] = value instead See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy data['lnfer_irrigation12_pre_pre_interaction'] = data['lnfer'] * data['irrigation1'] * data['irrigation1'] * data['pre'] * data['pre'] /var/folders/vd/0_phd7hx2n51y4412862zww00000gp/T/ipykernel_21901/1789790976.py:12: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame. Try using .loc[row_indexer,col_indexer] = value instead See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy data['tmp_tmp_interaction'] = data['tmp'] * data['tmp'] /var/folders/vd/0_phd7hx2n51y4412862zww00000gp/T/ipykernel_21901/1789790976.py:13: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame. Try using .loc[row_indexer,col_indexer] = value instead See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy data['lnfer_irrigation1_tmp_interaction'] = data['lnfer'] * data['irrigation1'] * data['tmp'] /var/folders/vd/0_phd7hx2n51y4412862zww00000gp/T/ipykernel_21901/1789790976.py:14: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame. Try using .loc[row_indexer,col_indexer] = value instead See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy data['lnfer_irrigation12_tmp_tmp_interaction'] = data['lnfer'] * data['irrigation1'] * data['irrigation1'] * data['tmp'] * data['tmp'] /var/folders/vd/0_phd7hx2n51y4412862zww00000gp/T/ipykernel_21901/1789790976.py:16: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame. Try using .loc[row_indexer,col_indexer] = value instead See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy data['year_geoid'] = data['year'] * data['geoid'] /Users/chenchenren/anaconda3/lib/python3.11/site-packages/statsmodels/base/model.py:1894: ValueWarning: covariance of constraints does not have full rank. The number of constraints is 1258, but rank is 9 warnings.warn('covariance of constraints does not have full '
OLS Regression Results ============================================================================== Dep. Variable: lnyield R-squared: 0.737 Model: OLS Adj. R-squared: 0.684 Within R-squared: 0.315 Method: Least Squares F-statistic: 231973491.2 Date: Fri, 15 Nov 2024 Prob (F-statistic): 0.00e+00 Time: 11:18:49 Log-Likelihood: 3532.31 No. Observations: 7507 AIC: -4546.6 Df Residuals: 6248 BIC: 4170.2 Df Model: 1258 Covariance Type: cluster ============================================================================== Coef. Std.Err. z P>|z| [0.025 0.975] Intercept -3.993741e+01 3.532505e+00 -11.305690 1.229830e-29 -4.686100e+01 -3.301383e+01 lnfer 6.212019e-03 1.003526e-02 0.619019 5.359038e-01 -1.345673e-02 2.588077e-02 irrigation1 -1.579472e-02 1.624024e-02 -0.972567 3.307686e-01 -4.762499e-02 1.603556e-02 irrigation12 3.696676e-03 1.836165e-03 2.013259 4.408741e-02 9.785812e-05 7.295494e-03 tmp 5.204956e-01 3.507404e-02 14.839911 8.086338e-50 4.517517e-01 5.892394e-01 tmp_tmp_interaction -1.331900e-02 8.415018e-04 -15.827655 2.005586e-56 -1.496831e-02 -1.166969e-02 pre 2.269818e-01 1.626871e-02 13.952047 3.057039e-44 1.950957e-01 2.588678e-01 pre_pre_interaction -1.425789e-02 1.160790e-03 -12.282920 1.118853e-34 -1.653300e-02 -1.198279e-02 year 2.083987e-02 1.813570e-03 11.491076 1.462790e-30 1.728534e-02 2.439440e-02 year_geoid -2.662822e-08 5.888926e-08 -0.452175 6.511433e-01 -1.420491e-07 8.879261e-08
#model 2
# Filter data for rainfed
data = df_soybean[df_soybean['group'] == 2]
# Prepare the interaction terms
data['pre_pre_interaction'] = data['pre'] * data['pre']
data['lnfer_pre_interaction'] = data['lnfer'] * data['pre']
data['lnfer_pre_pre_interaction'] = data['lnfer'] * data['pre'] * data['pre']
data['tmp_tmp_interaction'] = data['tmp'] * data['tmp']
data['lnfer_tmp_interaction'] = data['lnfer'] * data['tmp']
data['lnfer_tmp_tmp_interaction'] = data['lnfer'] * data['tmp'] * data['tmp']
data['year_geoid'] = data['year'] * data['geoid']
data1=data
# Define the formula for the regression model
formula = 'lnyield ~ lnfer + tmp + tmp_tmp_interaction + pre + pre_pre_interaction +year + year_geoid+ C(geoid)'
# Run the regression using the formula interface with clustered standard errors
model = sm.OLS.from_formula(formula, data=data1).fit(cov_type='cluster', cov_kwds={'groups': data['geoid']})
# Extract the summary table
summary = model.summary2().tables[1]
# Filter out rows corresponding to dummy variables for geoid
exclude_terms = ['C(geoid)']
pattern = '|'.join([re.escape(term) for term in exclude_terms])
filtered_summary2 = summary.loc[~summary.index.str.contains(pattern)]
data1['trend']=data1['year']
data1 = data1.set_index(['geoid','year'])
# Define the formula for the fixed effects regression model with PanelOLS
formula_fe = (
'lnyield ~ lnfer + tmp + tmp_tmp_interaction + pre + pre_pre_interaction +trend+ year_geoid+ EntityEffects '
)
# Fit the fixed effects model using PanelOLS
fe_model = PanelOLS.from_formula(formula_fe, data=data1)
fe_results = fe_model.fit(cov_type='robust')
# Format the OLS summary with the within R-squared
ols_info = f"""
OLS Regression Results
==============================================================================
Dep. Variable: lnyield R-squared: {model.rsquared:.3f}
Model: OLS Adj. R-squared: {model.rsquared_adj:.3f}
Within R-squared: {fe_results.rsquared_within:.3f}
Method: Least Squares F-statistic: {model.fvalue:.1f}
Date: {datetime.now():%a, %d %b %Y} Prob (F-statistic): {model.f_pvalue:.2e}
Time: {datetime.now():%H:%M:%S} Log-Likelihood: {model.llf:.2f}
No. Observations: {model.nobs:.0f} AIC: {model.aic:.1f}
Df Residuals: {model.df_resid:.0f} BIC: {model.bic:.1f}
Df Model: {model.df_model:.0f}
Covariance Type: cluster
==============================================================================
"""
# Combine the OLS info with the filtered summary
final_summary2 = ols_info + "\n" + filtered_summary2.to_string()
# Print the final summary
print(final_summary2)
/var/folders/vd/0_phd7hx2n51y4412862zww00000gp/T/ipykernel_21901/124981403.py:7: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame. Try using .loc[row_indexer,col_indexer] = value instead See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy data['pre_pre_interaction'] = data['pre'] * data['pre'] /var/folders/vd/0_phd7hx2n51y4412862zww00000gp/T/ipykernel_21901/124981403.py:8: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame. Try using .loc[row_indexer,col_indexer] = value instead See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy data['lnfer_pre_interaction'] = data['lnfer'] * data['pre'] /var/folders/vd/0_phd7hx2n51y4412862zww00000gp/T/ipykernel_21901/124981403.py:9: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame. Try using .loc[row_indexer,col_indexer] = value instead See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy data['lnfer_pre_pre_interaction'] = data['lnfer'] * data['pre'] * data['pre'] /var/folders/vd/0_phd7hx2n51y4412862zww00000gp/T/ipykernel_21901/124981403.py:11: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame. Try using .loc[row_indexer,col_indexer] = value instead See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy data['tmp_tmp_interaction'] = data['tmp'] * data['tmp'] /var/folders/vd/0_phd7hx2n51y4412862zww00000gp/T/ipykernel_21901/124981403.py:12: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame. Try using .loc[row_indexer,col_indexer] = value instead See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy data['lnfer_tmp_interaction'] = data['lnfer'] * data['tmp'] /var/folders/vd/0_phd7hx2n51y4412862zww00000gp/T/ipykernel_21901/124981403.py:13: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame. Try using .loc[row_indexer,col_indexer] = value instead See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy data['lnfer_tmp_tmp_interaction'] = data['lnfer'] * data['tmp'] * data['tmp'] /var/folders/vd/0_phd7hx2n51y4412862zww00000gp/T/ipykernel_21901/124981403.py:14: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame. Try using .loc[row_indexer,col_indexer] = value instead See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy data['year_geoid'] = data['year'] * data['geoid']
OLS Regression Results ============================================================================== Dep. Variable: lnyield R-squared: 0.742 Model: OLS Adj. R-squared: 0.694 Within R-squared: 0.322 Method: Least Squares F-statistic: 10177080987.7 Date: Fri, 15 Nov 2024 Prob (F-statistic): 0.00e+00 Time: 11:18:54 Log-Likelihood: 5660.02 No. Observations: 10075 AIC: -8150.0 Df Residuals: 8490 BIC: 3290.2 Df Model: 1584 Covariance Type: cluster ============================================================================== Coef. Std.Err. z P>|z| [0.025 0.975] Intercept -4.615970e+01 2.443103e+00 -18.893877 1.280843e-79 -5.094809e+01 -4.137130e+01 lnfer 3.263710e-03 7.599801e-03 0.429447 6.675981e-01 -1.163163e-02 1.815905e-02 tmp 2.946470e-01 2.363305e-02 12.467583 1.121769e-35 2.483271e-01 3.409669e-01 tmp_tmp_interaction -7.618231e-03 6.167671e-04 -12.351876 4.758848e-35 -8.827073e-03 -6.409390e-03 pre 1.789964e-01 1.241803e-02 14.414236 4.210707e-47 1.546575e-01 2.033353e-01 pre_pre_interaction -1.109359e-02 8.885902e-04 -12.484486 9.072700e-36 -1.283520e-02 -9.351988e-03 year 2.546785e-02 1.258349e-03 20.239096 4.432263e-91 2.300153e-02 2.793417e-02 year_geoid -2.143577e-07 3.744462e-08 -5.724659 1.036415e-08 -2.877478e-07 -1.409676e-07
/Users/chenchenren/anaconda3/lib/python3.11/site-packages/statsmodels/base/model.py:1894: ValueWarning: covariance of constraints does not have full rank. The number of constraints is 1585, but rank is 7 warnings.warn('covariance of constraints does not have full ' /var/folders/vd/0_phd7hx2n51y4412862zww00000gp/T/ipykernel_21901/124981403.py:31: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame. Try using .loc[row_indexer,col_indexer] = value instead See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy data1['trend']=data1['year']
#model 3
# Filter data for irrigated
data = df_soybean[df_soybean['group'] == 1]
# Prepare the interaction terms
data['pre_pre_interaction'] = data['pre'] * data['pre']
data['irrigation12'] = data['irrigation1'] * data['irrigation1']
data['lnfer_irrigation1_pre_interaction'] = data['lnfer'] * data['irrigation1'] * data['pre']
data['lnfer_irrigation12_pre_pre_interaction'] = data['lnfer'] * data['irrigation1'] * data['irrigation1'] * data['pre'] * data['pre']
data['tmp_tmp_interaction'] = data['tmp'] * data['tmp']
data['lnfer_irrigation1_tmp_interaction'] = data['lnfer'] * data['irrigation1'] * data['tmp']
data['lnfer_irrigation12_tmp_tmp_interaction'] = data['lnfer'] * data['irrigation1'] * data['irrigation1'] * data['tmp'] * data['tmp']
data['year_geoid'] = data['year'] * data['geoid']
# Define the formula for the regression model
formula = 'lnyield ~ lnfer + irrigation1 + irrigation12 + tmp + tmp_tmp_interaction + pre + pre_pre_interaction +year+lnfer_irrigation1_tmp_interaction+ lnfer_irrigation12_tmp_tmp_interaction+ year_geoid+ C(geoid)'
# Run the regression using the formula interface with clustered standard errors
model = sm.OLS.from_formula(formula, data=data).fit(cov_type='cluster', cov_kwds={'groups': data['geoid']})
# Extract the summary table
summary = model.summary2().tables[1]
# Filter out rows corresponding to dummy variables for geoid
exclude_terms = ['C(geoid)']
pattern = '|'.join([re.escape(term) for term in exclude_terms])
filtered_summary3 = summary.loc[~summary.index.str.contains(pattern)]
fixed_effects_only_model = sm.OLS.from_formula('lnyield ~ C(geoid)', data=data).fit()
# Calculate the total sum of squares (TSS) using the model with only fixed effects
tss_within = np.sum((fixed_effects_only_model.resid) ** 2)
# Calculate the residual sum of squares (RSS) from your main model
rss_within = np.sum((model.resid) ** 2)
# Calculate the within R-squared
within_r2 = 1 - (rss_within / tss_within)
# Format the OLS summary with the within R-squared
ols_info = f"""
OLS Regression Results
==============================================================================
Dep. Variable: lnyield R-squared: {model.rsquared:.3f}
Model: OLS Adj. R-squared: {model.rsquared_adj:.3f}
Within R-squared: {within_r2:.3f}
Method: Least Squares F-statistic: {model.fvalue:.1f}
Date: {datetime.now():%a, %d %b %Y} Prob (F-statistic): {model.f_pvalue:.2e}
Time: {datetime.now():%H:%M:%S} Log-Likelihood: {model.llf:.2f}
No. Observations: {model.nobs:.0f} AIC: {model.aic:.1f}
Df Residuals: {model.df_resid:.0f} BIC: {model.bic:.1f}
Df Model: {model.df_model:.0f}
Covariance Type: cluster
==============================================================================
"""
# Combine the OLS info with the filtered summary
final_summary3 = ols_info + "\n" + filtered_summary3.to_string()
# Print the final summary
print(final_summary3)
/var/folders/vd/0_phd7hx2n51y4412862zww00000gp/T/ipykernel_21901/3002633633.py:7: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame. Try using .loc[row_indexer,col_indexer] = value instead See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy data['pre_pre_interaction'] = data['pre'] * data['pre'] /var/folders/vd/0_phd7hx2n51y4412862zww00000gp/T/ipykernel_21901/3002633633.py:8: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame. Try using .loc[row_indexer,col_indexer] = value instead See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy data['irrigation12'] = data['irrigation1'] * data['irrigation1'] /var/folders/vd/0_phd7hx2n51y4412862zww00000gp/T/ipykernel_21901/3002633633.py:9: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame. Try using .loc[row_indexer,col_indexer] = value instead See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy data['lnfer_irrigation1_pre_interaction'] = data['lnfer'] * data['irrigation1'] * data['pre'] /var/folders/vd/0_phd7hx2n51y4412862zww00000gp/T/ipykernel_21901/3002633633.py:10: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame. Try using .loc[row_indexer,col_indexer] = value instead See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy data['lnfer_irrigation12_pre_pre_interaction'] = data['lnfer'] * data['irrigation1'] * data['irrigation1'] * data['pre'] * data['pre'] /var/folders/vd/0_phd7hx2n51y4412862zww00000gp/T/ipykernel_21901/3002633633.py:12: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame. Try using .loc[row_indexer,col_indexer] = value instead See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy data['tmp_tmp_interaction'] = data['tmp'] * data['tmp'] /var/folders/vd/0_phd7hx2n51y4412862zww00000gp/T/ipykernel_21901/3002633633.py:13: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame. Try using .loc[row_indexer,col_indexer] = value instead See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy data['lnfer_irrigation1_tmp_interaction'] = data['lnfer'] * data['irrigation1'] * data['tmp'] /var/folders/vd/0_phd7hx2n51y4412862zww00000gp/T/ipykernel_21901/3002633633.py:14: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame. Try using .loc[row_indexer,col_indexer] = value instead See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy data['lnfer_irrigation12_tmp_tmp_interaction'] = data['lnfer'] * data['irrigation1'] * data['irrigation1'] * data['tmp'] * data['tmp'] /var/folders/vd/0_phd7hx2n51y4412862zww00000gp/T/ipykernel_21901/3002633633.py:15: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame. Try using .loc[row_indexer,col_indexer] = value instead See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy data['year_geoid'] = data['year'] * data['geoid'] /Users/chenchenren/anaconda3/lib/python3.11/site-packages/statsmodels/base/model.py:1894: ValueWarning: covariance of constraints does not have full rank. The number of constraints is 1260, but rank is 11 warnings.warn('covariance of constraints does not have full '
OLS Regression Results ============================================================================== Dep. Variable: lnyield R-squared: 0.737 Model: OLS Adj. R-squared: 0.684 Within R-squared: 0.316 Method: Least Squares F-statistic: 6605558130.0 Date: Fri, 15 Nov 2024 Prob (F-statistic): 0.00e+00 Time: 11:18:59 Log-Likelihood: 3536.22 No. Observations: 7507 AIC: -4550.4 Df Residuals: 6246 BIC: 4180.2 Df Model: 1260 Covariance Type: cluster ============================================================================== Coef. Std.Err. z P>|z| [0.025 0.975] Intercept -4.017184e+01 3.524811e+00 -11.396879 4.333771e-30 -4.708035e+01 -3.326334e+01 lnfer 1.117614e-02 9.967509e-03 1.121257 2.621785e-01 -8.359820e-03 3.071210e-02 irrigation1 5.200204e-02 4.505940e-02 1.154078 2.484683e-01 -3.631275e-02 1.403168e-01 irrigation12 -5.614136e-03 7.045565e-03 -0.796833 4.255483e-01 -1.942319e-02 8.194917e-03 tmp 5.187373e-01 3.482357e-02 14.896156 3.490943e-50 4.504844e-01 5.869902e-01 tmp_tmp_interaction -1.326847e-02 8.345532e-04 -15.898891 6.449685e-57 -1.490416e-02 -1.163278e-02 pre 2.280190e-01 1.639217e-02 13.910234 5.489920e-44 1.958909e-01 2.601470e-01 pre_pre_interaction -1.433377e-02 1.170808e-03 -12.242638 1.839547e-34 -1.662851e-02 -1.203903e-02 year 2.096292e-02 1.809563e-03 11.584521 4.937210e-31 1.741624e-02 2.450960e-02 lnfer_irrigation1_tmp_interaction -1.295372e-03 7.666021e-04 -1.689758 9.107423e-02 -2.797885e-03 2.071404e-04 lnfer_irrigation12_tmp_tmp_interaction 7.094800e-06 5.124475e-06 1.384493 1.662075e-01 -2.948985e-06 1.713859e-05 year_geoid -3.491327e-08 5.879304e-08 -0.593833 5.526236e-01 -1.501455e-07 8.031897e-08
#model 4
# Filter data for rainfed
data = df_soybean[df_soybean['group'] == 2]
# Prepare the interaction terms
data['pre_pre_interaction'] = data['pre'] * data['pre']
data['lnfer_pre_interaction'] = data['lnfer'] * data['pre']
data['lnfer_pre_pre_interaction'] = data['lnfer'] * data['pre'] * data['pre']
data['tmp_tmp_interaction'] = data['tmp'] * data['tmp']
data['lnfer_tmp_interaction'] = data['lnfer'] * data['tmp']
data['lnfer_tmp_tmp_interaction'] = data['lnfer'] * data['tmp'] * data['tmp']
data['year_geoid'] = data['year'] * data['geoid']
data1=data
# Define the formula for the regression model
formula = 'lnyield ~ lnfer + tmp + tmp_tmp_interaction + pre + pre_pre_interaction +year+ year_geoid +lnfer_tmp_interaction+ lnfer_tmp_tmp_interaction + C(geoid)'
# Run the regression using the formula interface with clustered standard errors
model = sm.OLS.from_formula(formula, data=data).fit(cov_type='cluster', cov_kwds={'groups': data['geoid']})
# Extract the summary table
summary = model.summary2().tables[1]
# Filter out rows corresponding to dummy variables for geoid
exclude_terms = ['C(geoid)']
pattern = '|'.join([re.escape(term) for term in exclude_terms])
filtered_summary4 = summary.loc[~summary.index.str.contains(pattern)]
data1['trend']=data1['year']
# Calculate the total sum of squares (TSS) using the model with only fixed effects
data1 = data1.set_index(['geoid','year'])
# Define the formula for the fixed effects regression model with PanelOLS
formula_fe = (
'lnyield ~ lnfer + tmp + tmp_tmp_interaction + pre + pre_pre_interaction +lnfer_tmp_interaction+ lnfer_tmp_tmp_interaction +trend+ year_geoid+ EntityEffects '
)
# Fit the fixed effects model using PanelOLS
fe_model = PanelOLS.from_formula(formula_fe, data=data1)
fe_results = fe_model.fit(cov_type='robust')
# Format the OLS summary with the within R-squared
ols_info = f"""
OLS Regression Results
==============================================================================
Dep. Variable: lnyield R-squared: {model.rsquared:.3f}
Model: OLS Adj. R-squared: {model.rsquared_adj:.3f}
Within R-squared: {fe_results.rsquared_within:.3f}
Method: Least Squares F-statistic: {model.fvalue:.1f}
Date: {datetime.now():%a, %d %b %Y} Prob (F-statistic): {model.f_pvalue:.2e}
Time: {datetime.now():%H:%M:%S} Log-Likelihood: {model.llf:.2f}
No. Observations: {model.nobs:.0f} AIC: {model.aic:.1f}
Df Residuals: {model.df_resid:.0f} BIC: {model.bic:.1f}
Df Model: {model.df_model:.0f}
Covariance Type: cluster
==============================================================================
"""
# Combine the OLS info with the filtered summary
final_summary4 = ols_info + "\n" + filtered_summary4.to_string()
# Print the final summary
print(final_summary4)
/var/folders/vd/0_phd7hx2n51y4412862zww00000gp/T/ipykernel_21901/3865254653.py:7: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame. Try using .loc[row_indexer,col_indexer] = value instead See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy data['pre_pre_interaction'] = data['pre'] * data['pre'] /var/folders/vd/0_phd7hx2n51y4412862zww00000gp/T/ipykernel_21901/3865254653.py:8: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame. Try using .loc[row_indexer,col_indexer] = value instead See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy data['lnfer_pre_interaction'] = data['lnfer'] * data['pre'] /var/folders/vd/0_phd7hx2n51y4412862zww00000gp/T/ipykernel_21901/3865254653.py:9: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame. Try using .loc[row_indexer,col_indexer] = value instead See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy data['lnfer_pre_pre_interaction'] = data['lnfer'] * data['pre'] * data['pre'] /var/folders/vd/0_phd7hx2n51y4412862zww00000gp/T/ipykernel_21901/3865254653.py:11: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame. Try using .loc[row_indexer,col_indexer] = value instead See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy data['tmp_tmp_interaction'] = data['tmp'] * data['tmp'] /var/folders/vd/0_phd7hx2n51y4412862zww00000gp/T/ipykernel_21901/3865254653.py:12: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame. Try using .loc[row_indexer,col_indexer] = value instead See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy data['lnfer_tmp_interaction'] = data['lnfer'] * data['tmp'] /var/folders/vd/0_phd7hx2n51y4412862zww00000gp/T/ipykernel_21901/3865254653.py:13: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame. Try using .loc[row_indexer,col_indexer] = value instead See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy data['lnfer_tmp_tmp_interaction'] = data['lnfer'] * data['tmp'] * data['tmp'] /var/folders/vd/0_phd7hx2n51y4412862zww00000gp/T/ipykernel_21901/3865254653.py:14: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame. Try using .loc[row_indexer,col_indexer] = value instead See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy data['year_geoid'] = data['year'] * data['geoid']
OLS Regression Results ============================================================================== Dep. Variable: lnyield R-squared: 0.748 Model: OLS Adj. R-squared: 0.701 Within R-squared: 0.338 Method: Least Squares F-statistic: 1586871134.5 Date: Fri, 15 Nov 2024 Prob (F-statistic): 0.00e+00 Time: 11:19:05 Log-Likelihood: 5776.80 No. Observations: 10075 AIC: -8379.6 Df Residuals: 8488 BIC: 3075.1 Df Model: 1586 Covariance Type: cluster ============================================================================== Coef. Std.Err. z P>|z| [0.025 0.975] Intercept -4.159007e+01 2.427799e+00 -17.130776 8.748001e-66 -4.634847e+01 -3.683168e+01 lnfer -1.348237e+00 2.249930e-01 -5.992351 2.068283e-09 -1.789216e+00 -9.072591e-01 tmp 1.606939e-01 4.986077e-02 3.222853 1.269207e-03 6.296861e-02 2.584192e-01 tmp_tmp_interaction -5.044027e-03 1.299541e-03 -3.881390 1.038610e-04 -7.591082e-03 -2.496973e-03 pre 1.720517e-01 1.217527e-02 14.131246 2.437921e-45 1.481886e-01 1.959148e-01 pre_pre_interaction -1.061143e-02 8.737040e-04 -12.145343 6.072751e-34 -1.232386e-02 -8.899006e-03 year 2.397302e-02 1.223615e-03 19.591973 1.810374e-85 2.157478e-02 2.637126e-02 year_geoid -1.563708e-07 3.726879e-08 -4.195757 2.719611e-05 -2.294163e-07 -8.332531e-08 lnfer_tmp_interaction 1.134965e-01 2.271550e-02 4.996433 5.840036e-07 6.897492e-02 1.580180e-01 lnfer_tmp_tmp_interaction -2.264421e-03 5.672504e-04 -3.991926 6.553891e-05 -3.376212e-03 -1.152631e-03
/Users/chenchenren/anaconda3/lib/python3.11/site-packages/statsmodels/base/model.py:1894: ValueWarning: covariance of constraints does not have full rank. The number of constraints is 1587, but rank is 9 warnings.warn('covariance of constraints does not have full ' /var/folders/vd/0_phd7hx2n51y4412862zww00000gp/T/ipykernel_21901/3865254653.py:31: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame. Try using .loc[row_indexer,col_indexer] = value instead See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy data1['trend']=data1['year']
#model 5
# Filter data for irrigated
data = df_soybean[df_soybean['group'] == 1]
# Prepare the interaction terms
data['pre_pre_interaction'] = data['pre'] * data['pre']
data['irrigation12'] = data['irrigation1'] * data['irrigation1']
data['lnfer_irrigation1_pre_interaction'] = data['lnfer'] * data['irrigation1'] * data['pre']
data['lnfer_irrigation12_pre_pre_interaction'] = data['lnfer'] * data['irrigation1'] * data['irrigation1'] * data['pre'] * data['pre']
data['tmp_tmp_interaction'] = data['tmp'] * data['tmp']
data['lnfer_irrigation1_tmp_interaction'] = data['lnfer'] * data['irrigation1'] * data['tmp']
data['lnfer_irrigation12_tmp_tmp_interaction'] = data['lnfer'] * data['irrigation1'] * data['irrigation1'] * data['tmp'] * data['tmp']
data['year_geoid'] = data['year'] * data['geoid']
# Define the formula for the regression model
formula = 'lnyield ~ lnfer + irrigation1 + irrigation12 + tmp + tmp_tmp_interaction + pre + pre_pre_interaction +year + year_geoid+lnfer_irrigation1_pre_interaction+ lnfer_irrigation12_pre_pre_interaction+ C(geoid)'
# Run the regression using the formula interface with clustered standard errors
model = sm.OLS.from_formula(formula, data=data).fit(cov_type='cluster', cov_kwds={'groups': data['geoid']})
# Extract the summary table
summary = model.summary2().tables[1]
# Filter out rows corresponding to dummy variables for geoid
exclude_terms = ['C(geoid)']
pattern = '|'.join([re.escape(term) for term in exclude_terms])
filtered_summary5 = summary.loc[~summary.index.str.contains(pattern)]
fixed_effects_only_model = sm.OLS.from_formula('lnyield ~ C(geoid)', data=data).fit()
# Calculate the total sum of squares (TSS) using the model with only fixed effects
tss_within = np.sum((fixed_effects_only_model.resid) ** 2)
# Calculate the residual sum of squares (RSS) from your main model
rss_within = np.sum((model.resid) ** 2)
# Calculate the within R-squared
within_r2 = 1 - (rss_within / tss_within)
# Format the OLS summary with the within R-squared
ols_info = f"""
OLS Regression Results
==============================================================================
Dep. Variable: lnyield R-squared: {model.rsquared:.3f}
Model: OLS Adj. R-squared: {model.rsquared_adj:.3f}
Within R-squared: {within_r2:.3f}
Method: Least Squares F-statistic: {model.fvalue:.1f}
Date: {datetime.now():%a, %d %b %Y} Prob (F-statistic): {model.f_pvalue:.2e}
Time: {datetime.now():%H:%M:%S} Log-Likelihood: {model.llf:.2f}
No. Observations: {model.nobs:.0f} AIC: {model.aic:.1f}
Df Residuals: {model.df_resid:.0f} BIC: {model.bic:.1f}
Df Model: {model.df_model:.0f}
Covariance Type: cluster
==============================================================================
"""
# Combine the OLS info with the filtered summary
final_summary5 = ols_info + "\n" + filtered_summary5.to_string()
# Print the final summary
print(final_summary5)
/var/folders/vd/0_phd7hx2n51y4412862zww00000gp/T/ipykernel_21901/831502823.py:7: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame. Try using .loc[row_indexer,col_indexer] = value instead See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy data['pre_pre_interaction'] = data['pre'] * data['pre'] /var/folders/vd/0_phd7hx2n51y4412862zww00000gp/T/ipykernel_21901/831502823.py:8: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame. Try using .loc[row_indexer,col_indexer] = value instead See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy data['irrigation12'] = data['irrigation1'] * data['irrigation1'] /var/folders/vd/0_phd7hx2n51y4412862zww00000gp/T/ipykernel_21901/831502823.py:9: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame. Try using .loc[row_indexer,col_indexer] = value instead See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy data['lnfer_irrigation1_pre_interaction'] = data['lnfer'] * data['irrigation1'] * data['pre'] /var/folders/vd/0_phd7hx2n51y4412862zww00000gp/T/ipykernel_21901/831502823.py:10: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame. Try using .loc[row_indexer,col_indexer] = value instead See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy data['lnfer_irrigation12_pre_pre_interaction'] = data['lnfer'] * data['irrigation1'] * data['irrigation1'] * data['pre'] * data['pre'] /var/folders/vd/0_phd7hx2n51y4412862zww00000gp/T/ipykernel_21901/831502823.py:12: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame. Try using .loc[row_indexer,col_indexer] = value instead See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy data['tmp_tmp_interaction'] = data['tmp'] * data['tmp'] /var/folders/vd/0_phd7hx2n51y4412862zww00000gp/T/ipykernel_21901/831502823.py:13: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame. Try using .loc[row_indexer,col_indexer] = value instead See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy data['lnfer_irrigation1_tmp_interaction'] = data['lnfer'] * data['irrigation1'] * data['tmp'] /var/folders/vd/0_phd7hx2n51y4412862zww00000gp/T/ipykernel_21901/831502823.py:14: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame. Try using .loc[row_indexer,col_indexer] = value instead See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy data['lnfer_irrigation12_tmp_tmp_interaction'] = data['lnfer'] * data['irrigation1'] * data['irrigation1'] * data['tmp'] * data['tmp'] /var/folders/vd/0_phd7hx2n51y4412862zww00000gp/T/ipykernel_21901/831502823.py:15: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame. Try using .loc[row_indexer,col_indexer] = value instead See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy data['year_geoid'] = data['year'] * data['geoid'] /Users/chenchenren/anaconda3/lib/python3.11/site-packages/statsmodels/base/model.py:1894: ValueWarning: covariance of constraints does not have full rank. The number of constraints is 1260, but rank is 11 warnings.warn('covariance of constraints does not have full '
OLS Regression Results ============================================================================== Dep. Variable: lnyield R-squared: 0.738 Model: OLS Adj. R-squared: 0.685 Within R-squared: 0.317 Method: Least Squares F-statistic: -1021640359.7 Date: Fri, 15 Nov 2024 Prob (F-statistic): 1.00e+00 Time: 11:19:10 Log-Likelihood: 3546.14 No. Observations: 7507 AIC: -4570.3 Df Residuals: 6246 BIC: 4160.4 Df Model: 1260 Covariance Type: cluster ============================================================================== Coef. Std.Err. z P>|z| [0.025 0.975] Intercept -4.025514e+01 3.530404e+00 -11.402419 4.066618e-30 -4.717460e+01 -3.333568e+01 lnfer 1.303281e-02 1.003026e-02 1.299349 1.938242e-01 -6.626142e-03 3.269176e-02 irrigation1 5.221407e-02 2.742767e-02 1.903701 5.694919e-02 -1.543174e-03 1.059713e-01 irrigation12 -7.103953e-04 2.290441e-03 -0.310157 7.564419e-01 -5.199576e-03 3.778786e-03 tmp 5.261096e-01 3.503865e-02 15.015122 5.845459e-51 4.574351e-01 5.947841e-01 tmp_tmp_interaction -1.344705e-02 8.403977e-04 -16.000815 1.261141e-57 -1.509420e-02 -1.179990e-02 pre 2.314217e-01 1.639455e-02 14.115773 3.036694e-45 1.992890e-01 2.635544e-01 pre_pre_interaction -1.448400e-02 1.167307e-03 -12.408039 2.363713e-35 -1.677188e-02 -1.219612e-02 year 2.096354e-02 1.811326e-03 11.573585 5.608911e-31 1.741340e-02 2.451367e-02 year_geoid -3.666940e-08 5.900941e-08 -0.621416 5.343259e-01 -1.523257e-07 7.898692e-08 lnfer_irrigation1_pre_interaction -6.158405e-03 1.981774e-03 -3.107522 1.886631e-03 -1.004261e-02 -2.274200e-03 lnfer_irrigation12_pre_pre_interaction 7.054057e-05 3.345749e-05 2.108364 3.499950e-02 4.965096e-06 1.361160e-04
#model 6
# Filter data for rainfed
data = df_soybean[df_soybean['group'] == 2]
# Prepare the interaction terms
data['pre_pre_interaction'] = data['pre'] * data['pre']
data['lnfer_pre_interaction'] = data['lnfer'] * data['pre']
data['lnfer_pre_pre_interaction'] = data['lnfer'] * data['pre'] * data['pre']
data['tmp_tmp_interaction'] = data['tmp'] * data['tmp']
data['lnfer_tmp_interaction'] = data['lnfer'] * data['tmp']
data['lnfer_tmp_tmp_interaction'] = data['lnfer'] * data['tmp'] * data['tmp']
data['year_geoid'] = data['year'] * data['geoid']
data1=data
# Define the formula for the regression model
formula = 'lnyield ~ lnfer + tmp + tmp_tmp_interaction + pre + pre_pre_interaction +year+ year_geoid+lnfer_pre_interaction+ lnfer_pre_pre_interaction + C(geoid)'
# Run the regression using the formula interface with clustered standard errors
model = sm.OLS.from_formula(formula, data=data).fit(cov_type='cluster', cov_kwds={'groups': data['geoid']})
# Extract the summary table
summary = model.summary2().tables[1]
# Filter out rows corresponding to dummy variables for geoid
exclude_terms = ['C(geoid)']
pattern = '|'.join([re.escape(term) for term in exclude_terms])
filtered_summary6 = summary.loc[~summary.index.str.contains(pattern)]
data1['trend']=data1['year']
data1 = data1.set_index(['geoid','year'])
# Define the formula for the fixed effects regression model with PanelOLS
formula_fe = (
'lnyield ~ lnfer + tmp + tmp_tmp_interaction + pre + pre_pre_interaction +lnfer_pre_interaction+ lnfer_pre_pre_interaction +trend+ year_geoid+ EntityEffects '
)
# Fit the fixed effects model using PanelOLS
fe_model = PanelOLS.from_formula(formula_fe, data=data1)
fe_results = fe_model.fit(cov_type='robust')
# Format the OLS summary with the within R-squared
ols_info = f"""
OLS Regression Results
==============================================================================
Dep. Variable: lnyield R-squared: {model.rsquared:.3f}
Model: OLS Adj. R-squared: {model.rsquared_adj:.3f}
Within R-squared: {fe_results.rsquared_within:.3f}
Method: Least Squares F-statistic: {model.fvalue:.1f}
Date: {datetime.now():%a, %d %b %Y} Prob (F-statistic): {model.f_pvalue:.2e}
Time: {datetime.now():%H:%M:%S} Log-Likelihood: {model.llf:.2f}
No. Observations: {model.nobs:.0f} AIC: {model.aic:.1f}
Df Residuals: {model.df_resid:.0f} BIC: {model.bic:.1f}
Df Model: {model.df_model:.0f}
Covariance Type: cluster
==============================================================================
"""
# Combine the OLS info with the filtered summary
final_summary6 = ols_info + "\n" + filtered_summary6.to_string()
# Print the final summary
print(final_summary6)
/var/folders/vd/0_phd7hx2n51y4412862zww00000gp/T/ipykernel_21901/2586232414.py:7: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame. Try using .loc[row_indexer,col_indexer] = value instead See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy data['pre_pre_interaction'] = data['pre'] * data['pre'] /var/folders/vd/0_phd7hx2n51y4412862zww00000gp/T/ipykernel_21901/2586232414.py:8: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame. Try using .loc[row_indexer,col_indexer] = value instead See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy data['lnfer_pre_interaction'] = data['lnfer'] * data['pre'] /var/folders/vd/0_phd7hx2n51y4412862zww00000gp/T/ipykernel_21901/2586232414.py:9: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame. Try using .loc[row_indexer,col_indexer] = value instead See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy data['lnfer_pre_pre_interaction'] = data['lnfer'] * data['pre'] * data['pre'] /var/folders/vd/0_phd7hx2n51y4412862zww00000gp/T/ipykernel_21901/2586232414.py:11: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame. Try using .loc[row_indexer,col_indexer] = value instead See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy data['tmp_tmp_interaction'] = data['tmp'] * data['tmp'] /var/folders/vd/0_phd7hx2n51y4412862zww00000gp/T/ipykernel_21901/2586232414.py:12: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame. Try using .loc[row_indexer,col_indexer] = value instead See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy data['lnfer_tmp_interaction'] = data['lnfer'] * data['tmp'] /var/folders/vd/0_phd7hx2n51y4412862zww00000gp/T/ipykernel_21901/2586232414.py:13: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame. Try using .loc[row_indexer,col_indexer] = value instead See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy data['lnfer_tmp_tmp_interaction'] = data['lnfer'] * data['tmp'] * data['tmp'] /var/folders/vd/0_phd7hx2n51y4412862zww00000gp/T/ipykernel_21901/2586232414.py:14: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame. Try using .loc[row_indexer,col_indexer] = value instead See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy data['year_geoid'] = data['year'] * data['geoid']
OLS Regression Results ============================================================================== Dep. Variable: lnyield R-squared: 0.743 Model: OLS Adj. R-squared: 0.695 Within R-squared: 0.325 Method: Least Squares F-statistic: -34298358.6 Date: Fri, 15 Nov 2024 Prob (F-statistic): 1.00e+00 Time: 11:19:15 Log-Likelihood: 5684.24 No. Observations: 10075 AIC: -8194.5 Df Residuals: 8488 BIC: 3260.2 Df Model: 1586 Covariance Type: cluster ============================================================================== Coef. Std.Err. z P>|z| [0.025 0.975] Intercept -4.426475e+01 2.442425e+00 -18.123281 2.087673e-73 -4.905181e+01 -3.947768e+01 lnfer -9.289319e-02 4.463449e-02 -2.081198 3.741583e-02 -1.803752e-01 -5.411199e-03 tmp 2.948238e-01 2.382286e-02 12.375668 3.539384e-35 2.481319e-01 3.415158e-01 tmp_tmp_interaction -7.614167e-03 6.208992e-04 -12.263129 1.428749e-34 -8.831107e-03 -6.397227e-03 pre 1.507325e-01 2.264502e-02 6.656321 2.807658e-11 1.063491e-01 1.951159e-01 pre_pre_interaction -9.830327e-03 1.661822e-03 -5.915393 3.310851e-09 -1.308744e-02 -6.573217e-03 year 2.457080e-02 1.256239e-03 19.559016 3.456824e-85 2.210861e-02 2.703298e-02 year_geoid -1.947294e-07 3.728484e-08 -5.222749 1.762859e-07 -2.678063e-07 -1.216524e-07 lnfer_pre_interaction 2.398136e-02 1.284979e-02 1.866284 6.200158e-02 -1.203762e-03 4.916648e-02 lnfer_pre_pre_interaction -1.254279e-03 8.938748e-04 -1.403193 1.605592e-01 -3.006242e-03 4.976832e-04
/Users/chenchenren/anaconda3/lib/python3.11/site-packages/statsmodels/base/model.py:1894: ValueWarning: covariance of constraints does not have full rank. The number of constraints is 1587, but rank is 9 warnings.warn('covariance of constraints does not have full ' /var/folders/vd/0_phd7hx2n51y4412862zww00000gp/T/ipykernel_21901/2586232414.py:30: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame. Try using .loc[row_indexer,col_indexer] = value instead See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy data1['trend']=data1['year']
#model 7
# Filter data for irrigated
data = df_soybean[df_soybean['group'] == 1]
# Prepare the interaction terms
data['pre_pre_interaction'] = data['pre'] * data['pre']
data['irrigation12'] = data['irrigation1'] * data['irrigation1']
data['lnfer_irrigation1_pre_interaction'] = data['lnfer'] * data['irrigation1'] * data['pre']
data['lnfer_irrigation12_pre_pre_interaction'] = data['lnfer'] * data['irrigation1'] * data['irrigation1'] * data['pre'] * data['pre']
data['tmp_tmp_interaction'] = data['tmp'] * data['tmp']
data['lnfer_irrigation1_tmp_interaction'] = data['lnfer'] * data['irrigation1'] * data['tmp']
data['lnfer_irrigation12_tmp_tmp_interaction'] = data['lnfer'] * data['irrigation1'] * data['irrigation1'] * data['tmp'] * data['tmp']
data['year_geoid'] = data['year'] * data['geoid']
# Define the formula for the regression model
formula = 'lnyield ~ lnfer + irrigation1 + irrigation12 + tmp + tmp_tmp_interaction +year+ year_geoid+ lnfer_irrigation1_tmp_interaction+ lnfer_irrigation12_tmp_tmp_interaction+lnfer_irrigation1_pre_interaction+ lnfer_irrigation12_pre_pre_interaction+ pre + pre_pre_interaction + C(geoid)'
# Run the regression using the formula interface with clustered standard errors
model = sm.OLS.from_formula(formula, data=data).fit(cov_type='cluster', cov_kwds={'groups': data['geoid']})
# Extract the summary table
summary = model.summary2().tables[1]
# Filter out rows corresponding to dummy variables for geoid
exclude_terms = ['C(geoid)']
pattern = '|'.join([re.escape(term) for term in exclude_terms])
filtered_summary7 = summary.loc[~summary.index.str.contains(pattern)]
fixed_effects_only_model = sm.OLS.from_formula('lnyield ~ C(geoid)', data=data).fit()
# Calculate the total sum of squares (TSS) using the model with only fixed effects
tss_within = np.sum((fixed_effects_only_model.resid) ** 2)
# Calculate the residual sum of squares (RSS) from your main model
rss_within = np.sum((model.resid) ** 2)
# Calculate the within R-squared
within_r2 = 1 - (rss_within / tss_within)
# Format the OLS summary with the within R-squared
ols_info = f"""
OLS Regression Results
==============================================================================
Dep. Variable: lnyield R-squared: {model.rsquared:.3f}
Model: OLS Adj. R-squared: {model.rsquared_adj:.3f}
Within R-squared: {within_r2:.3f}
Method: Least Squares F-statistic: {model.fvalue:.1f}
Date: {datetime.now():%a, %d %b %Y} Prob (F-statistic): {model.f_pvalue:.2e}
Time: {datetime.now():%H:%M:%S} Log-Likelihood: {model.llf:.2f}
No. Observations: {model.nobs:.0f} AIC: {model.aic:.1f}
Df Residuals: {model.df_resid:.0f} BIC: {model.bic:.1f}
Df Model: {model.df_model:.0f}
Covariance Type: cluster
==============================================================================
"""
# Combine the OLS info with the filtered summary
final_summary7 = ols_info + "\n" + filtered_summary7.to_string()
# Print the final summary
print(final_summary7)
/var/folders/vd/0_phd7hx2n51y4412862zww00000gp/T/ipykernel_21901/2551732332.py:7: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame. Try using .loc[row_indexer,col_indexer] = value instead See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy data['pre_pre_interaction'] = data['pre'] * data['pre'] /var/folders/vd/0_phd7hx2n51y4412862zww00000gp/T/ipykernel_21901/2551732332.py:8: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame. Try using .loc[row_indexer,col_indexer] = value instead See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy data['irrigation12'] = data['irrigation1'] * data['irrigation1'] /var/folders/vd/0_phd7hx2n51y4412862zww00000gp/T/ipykernel_21901/2551732332.py:9: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame. Try using .loc[row_indexer,col_indexer] = value instead See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy data['lnfer_irrigation1_pre_interaction'] = data['lnfer'] * data['irrigation1'] * data['pre'] /var/folders/vd/0_phd7hx2n51y4412862zww00000gp/T/ipykernel_21901/2551732332.py:10: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame. Try using .loc[row_indexer,col_indexer] = value instead See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy data['lnfer_irrigation12_pre_pre_interaction'] = data['lnfer'] * data['irrigation1'] * data['irrigation1'] * data['pre'] * data['pre'] /var/folders/vd/0_phd7hx2n51y4412862zww00000gp/T/ipykernel_21901/2551732332.py:12: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame. Try using .loc[row_indexer,col_indexer] = value instead See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy data['tmp_tmp_interaction'] = data['tmp'] * data['tmp'] /var/folders/vd/0_phd7hx2n51y4412862zww00000gp/T/ipykernel_21901/2551732332.py:13: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame. Try using .loc[row_indexer,col_indexer] = value instead See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy data['lnfer_irrigation1_tmp_interaction'] = data['lnfer'] * data['irrigation1'] * data['tmp'] /var/folders/vd/0_phd7hx2n51y4412862zww00000gp/T/ipykernel_21901/2551732332.py:14: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame. Try using .loc[row_indexer,col_indexer] = value instead See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy data['lnfer_irrigation12_tmp_tmp_interaction'] = data['lnfer'] * data['irrigation1'] * data['irrigation1'] * data['tmp'] * data['tmp'] /var/folders/vd/0_phd7hx2n51y4412862zww00000gp/T/ipykernel_21901/2551732332.py:15: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame. Try using .loc[row_indexer,col_indexer] = value instead See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy data['year_geoid'] = data['year'] * data['geoid'] /Users/chenchenren/anaconda3/lib/python3.11/site-packages/statsmodels/base/model.py:1894: ValueWarning: covariance of constraints does not have full rank. The number of constraints is 1262, but rank is 13 warnings.warn('covariance of constraints does not have full '
OLS Regression Results ============================================================================== Dep. Variable: lnyield R-squared: 0.739 Model: OLS Adj. R-squared: 0.686 Within R-squared: 0.319 Method: Least Squares F-statistic: -111784034.6 Date: Fri, 15 Nov 2024 Prob (F-statistic): 1.00e+00 Time: 11:19:20 Log-Likelihood: 3554.34 No. Observations: 7507 AIC: -4582.7 Df Residuals: 6244 BIC: 4161.8 Df Model: 1262 Covariance Type: cluster ============================================================================== Coef. Std.Err. z P>|z| [0.025 0.975] Intercept -3.999831e+01 3.516756e+00 -11.373639 5.657834e-30 -4.689103e+01 -3.310560e+01 lnfer 1.125907e-02 9.948052e-03 1.131786 2.577243e-01 -8.238755e-03 3.075689e-02 irrigation1 2.609614e-02 4.440535e-02 0.587680 5.567470e-01 -6.093674e-02 1.131290e-01 irrigation12 -2.465586e-03 6.676493e-03 -0.369294 7.119090e-01 -1.555127e-02 1.062010e-02 tmp 5.379418e-01 3.513004e-02 15.312872 6.273192e-53 4.690882e-01 6.067954e-01 tmp_tmp_interaction -1.374014e-02 8.423301e-04 -16.312058 8.101710e-60 -1.539107e-02 -1.208920e-02 year 2.077213e-02 1.798938e-03 11.546882 7.654774e-31 1.724627e-02 2.429798e-02 year_geoid -3.051634e-08 5.854957e-08 -0.521205 6.022238e-01 -1.452714e-07 8.423870e-08 lnfer_irrigation1_tmp_interaction 2.071831e-03 1.269940e-03 1.631440 1.027975e-01 -4.172060e-04 4.560868e-03 lnfer_irrigation12_tmp_tmp_interaction -2.118492e-06 5.544439e-06 -0.382093 7.023923e-01 -1.298539e-05 8.748409e-06 lnfer_irrigation1_pre_interaction -1.262525e-02 3.683210e-03 -3.427785 6.085279e-04 -1.984421e-02 -5.406292e-03 lnfer_irrigation12_pre_pre_interaction 1.195169e-04 4.084788e-05 2.925902 3.434588e-03 3.945653e-05 1.995773e-04 pre 2.326726e-01 1.622412e-02 14.341152 1.210295e-46 2.008739e-01 2.644713e-01 pre_pre_interaction -1.444476e-02 1.154386e-03 -12.512941 6.343126e-36 -1.670732e-02 -1.218221e-02
#model 8
# Filter data for rainfed
data = df_soybean[df_soybean['group'] == 2]
# Prepare the interaction terms
data['pre_pre_interaction'] = data['pre'] * data['pre']
data['lnfer_pre_interaction'] = data['lnfer'] * data['pre']
data['lnfer_pre_pre_interaction'] = data['lnfer'] * data['pre'] * data['pre']
data['tmp_tmp_interaction'] = data['tmp'] * data['tmp']
data['lnfer_tmp_interaction'] = data['lnfer'] * data['tmp']
data['lnfer_tmp_tmp_interaction'] = data['lnfer'] * data['tmp'] * data['tmp']
data['year_geoid'] = data['year'] * data['geoid']
data1=data
# Define the formula for the regression model
formula = 'lnyield ~ lnfer + tmp + tmp_tmp_interaction + pre +pre_pre_interaction+year+ year_geoid+ lnfer_tmp_interaction+lnfer_tmp_tmp_interaction +lnfer_pre_interaction+ lnfer_pre_pre_interaction + C(geoid)'
# Run the regression using the formula interface with clustered standard errors
model = sm.OLS.from_formula(formula, data=data).fit(cov_type='cluster', cov_kwds={'groups': data['geoid']})
# Extract the summary table
summary = model.summary2().tables[1]
# Filter out rows corresponding to dummy variables for geoid
exclude_terms = ['C(geoid)']
pattern = '|'.join([re.escape(term) for term in exclude_terms])
filtered_summary8 = summary.loc[~summary.index.str.contains(pattern)]
data1['trend']=data1['year']
data1 = data1.set_index(['geoid','year'])
# Define the formula for the fixed effects regression model with PanelOLS
formula_fe = (
'lnyield ~ lnfer + tmp + tmp_tmp_interaction + pre +pre_pre_interaction+trend+ year_geoid+ lnfer_tmp_interaction+lnfer_tmp_tmp_interaction +lnfer_pre_interaction+ lnfer_pre_pre_interaction + EntityEffects '
)
# Fit the fixed effects model using PanelOLS
fe_model = PanelOLS.from_formula(formula_fe, data=data1)
fe_results = fe_model.fit(cov_type='robust')
# Format the OLS summary with the within R-squared
ols_info = f"""
OLS Regression Results
==============================================================================
Dep. Variable: lnyield R-squared: {model.rsquared:.3f}
Model: OLS Adj. R-squared: {model.rsquared_adj:.3f}
Within R-squared: {fe_results.rsquared_within:.3f}
Method: Least Squares F-statistic: {model.fvalue:.1f}
Date: {datetime.now():%a, %d %b %Y} Prob (F-statistic): {model.f_pvalue:.2e}
Time: {datetime.now():%H:%M:%S} Log-Likelihood: {model.llf:.2f}
No. Observations: {model.nobs:.0f} AIC: {model.aic:.1f}
Df Residuals: {model.df_resid:.0f} BIC: {model.bic:.1f}
Df Model: {model.df_model:.0f}
Covariance Type: cluster
==============================================================================
"""
# Combine the OLS info with the filtered summary
final_summary8 = ols_info + "\n" + filtered_summary8.to_string()
# Print the final summary
print(final_summary8)
/var/folders/vd/0_phd7hx2n51y4412862zww00000gp/T/ipykernel_21901/3559849126.py:7: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame. Try using .loc[row_indexer,col_indexer] = value instead See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy data['pre_pre_interaction'] = data['pre'] * data['pre'] /var/folders/vd/0_phd7hx2n51y4412862zww00000gp/T/ipykernel_21901/3559849126.py:8: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame. Try using .loc[row_indexer,col_indexer] = value instead See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy data['lnfer_pre_interaction'] = data['lnfer'] * data['pre'] /var/folders/vd/0_phd7hx2n51y4412862zww00000gp/T/ipykernel_21901/3559849126.py:9: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame. Try using .loc[row_indexer,col_indexer] = value instead See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy data['lnfer_pre_pre_interaction'] = data['lnfer'] * data['pre'] * data['pre'] /var/folders/vd/0_phd7hx2n51y4412862zww00000gp/T/ipykernel_21901/3559849126.py:11: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame. Try using .loc[row_indexer,col_indexer] = value instead See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy data['tmp_tmp_interaction'] = data['tmp'] * data['tmp'] /var/folders/vd/0_phd7hx2n51y4412862zww00000gp/T/ipykernel_21901/3559849126.py:12: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame. Try using .loc[row_indexer,col_indexer] = value instead See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy data['lnfer_tmp_interaction'] = data['lnfer'] * data['tmp'] /var/folders/vd/0_phd7hx2n51y4412862zww00000gp/T/ipykernel_21901/3559849126.py:13: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame. Try using .loc[row_indexer,col_indexer] = value instead See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy data['lnfer_tmp_tmp_interaction'] = data['lnfer'] * data['tmp'] * data['tmp'] /var/folders/vd/0_phd7hx2n51y4412862zww00000gp/T/ipykernel_21901/3559849126.py:14: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame. Try using .loc[row_indexer,col_indexer] = value instead See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy data['year_geoid'] = data['year'] * data['geoid']
OLS Regression Results ============================================================================== Dep. Variable: lnyield R-squared: 0.749 Model: OLS Adj. R-squared: 0.702 Within R-squared: 0.341 Method: Least Squares F-statistic: 31074990.5 Date: Fri, 15 Nov 2024 Prob (F-statistic): 0.00e+00 Time: 11:19:26 Log-Likelihood: 5801.49 No. Observations: 10075 AIC: -8425.0 Df Residuals: 8486 BIC: 3044.1 Df Model: 1588 Covariance Type: cluster ============================================================================== Coef. Std.Err. z P>|z| [0.025 0.975] Intercept -4.002823e+01 2.438288e+00 -16.416530 1.456612e-60 -4.480719e+01 -3.524927e+01 lnfer -1.357485e+00 2.308549e-01 -5.880252 4.096416e-09 -1.809952e+00 -9.050177e-01 tmp 1.784050e-01 4.987598e-02 3.576971 3.475981e-04 8.064984e-02 2.761601e-01 tmp_tmp_interaction -5.489017e-03 1.301083e-03 -4.218805 2.456000e-05 -8.039093e-03 -2.938941e-03 pre 1.378057e-01 2.232611e-02 6.172401 6.726077e-10 9.404731e-02 1.815640e-01 pre_pre_interaction -8.837721e-03 1.645483e-03 -5.370898 7.834573e-08 -1.206281e-02 -5.612633e-03 year 2.316980e-02 1.224421e-03 18.923059 7.365373e-80 2.076998e-02 2.556962e-02 year_geoid -1.393738e-07 3.716933e-08 -3.749700 1.770463e-04 -2.122244e-07 -6.652328e-08 lnfer_tmp_interaction 1.035174e-01 2.225610e-02 4.651192 3.300223e-06 5.989624e-02 1.471385e-01 lnfer_tmp_tmp_interaction -2.014753e-03 5.551371e-04 -3.629290 2.842020e-04 -3.102802e-03 -9.267047e-04 lnfer_pre_interaction 2.776295e-02 1.254637e-02 2.212827 2.690960e-02 3.172509e-03 5.235339e-02 lnfer_pre_pre_interaction -1.569957e-03 8.749615e-04 -1.794315 7.276285e-02 -3.284850e-03 1.449362e-04
/Users/chenchenren/anaconda3/lib/python3.11/site-packages/statsmodels/base/model.py:1894: ValueWarning: covariance of constraints does not have full rank. The number of constraints is 1589, but rank is 11 warnings.warn('covariance of constraints does not have full ' /var/folders/vd/0_phd7hx2n51y4412862zww00000gp/T/ipykernel_21901/3559849126.py:31: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame. Try using .loc[row_indexer,col_indexer] = value instead See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy data1['trend']=data1['year']
# Placeholder dictionary to store results for each final_summary
results_dict = {}
def add_significance_asterisks(coef, p_value):
if p_value <= 0.01:
return f"{round(coef, 3)}***"
elif p_value <= 0.05:
return f"{round(coef, 3)}**"
elif p_value <= 0.1:
return f"{round(coef, 3)}*"
else:
return f"{round(coef, 3)}"
# Define a function to process each final_summary DataFrame
def process_summary(summary, model_name):
# Create a list to store the structured data for this summary
structured_data = []
for variable, row in summary.iterrows():
# Apply significance asterisks to the coefficient based on p-value
coef_with_significance = add_significance_asterisks(row['Coef.'], row['P>|z|'])
# Format standard error with parentheses
std_err_with_parentheses = f"({round(row['Std.Err.'], 3)})"
# Append coefficient with significance, standard error with parentheses, and p-value as separate rows for each variable
structured_data.append((variable, 'Coef.', coef_with_significance))
structured_data.append((variable, 'Std.Err.', std_err_with_parentheses))
structured_data.append((variable, 'P>|z|', round(row['P>|z|'], 3)))
# Convert to DataFrame and return
return pd.DataFrame(structured_data, columns=['Variable', 'Metric', 'Value']).set_index(['Variable', 'Metric'])
# List of your final summaries
filtered_summaries = [filtered_summary1, filtered_summary2, filtered_summary3, filtered_summary4,
filtered_summary5, filtered_summary6, filtered_summary7, filtered_summary8]
# Loop over each final_summary and process it
for i, summary in enumerate(filtered_summaries, start=1):
# Process each summary and add to the dictionary
results_dict[f'Model_{i}'] = process_summary(summary, f'Model_{i}')
# Concatenate all model results along columns
final_results = pd.concat(results_dict, axis=1)
# Export to Excel
final_results.to_excel("model_summary_table14.xlsx")
print(final_results)
print("Model summary statistics with significance have been saved to model_summary_statistics_with_significance.xlsx")
Model_1 Model_2 \ Value Value Variable Metric Intercept Coef. -39.937*** -46.16*** Std.Err. (3.533) (2.443) P>|z| 0.0 0.0 lnfer Coef. 0.006 0.003 Std.Err. (0.01) (0.008) P>|z| 0.536 0.668 irrigation1 Coef. -0.016 NaN Std.Err. (0.016) NaN P>|z| 0.331 NaN irrigation12 Coef. 0.004** NaN Std.Err. (0.002) NaN P>|z| 0.044 NaN tmp Coef. 0.52*** 0.295*** Std.Err. (0.035) (0.024) P>|z| 0.0 0.0 tmp_tmp_interaction Coef. -0.013*** -0.008*** Std.Err. (0.001) (0.001) P>|z| 0.0 0.0 pre Coef. 0.227*** 0.179*** Std.Err. (0.016) (0.012) P>|z| 0.0 0.0 pre_pre_interaction Coef. -0.014*** -0.011*** Std.Err. (0.001) (0.001) P>|z| 0.0 0.0 year Coef. 0.021*** 0.025*** Std.Err. (0.002) (0.001) P>|z| 0.0 0.0 year_geoid Coef. -0.0 -0.0*** Std.Err. (0.0) (0.0) P>|z| 0.651 0.0 lnfer_irrigation1_tmp_interaction Coef. NaN NaN Std.Err. NaN NaN P>|z| NaN NaN lnfer_irrigation12_tmp_tmp_interaction Coef. NaN NaN Std.Err. NaN NaN P>|z| NaN NaN lnfer_tmp_interaction Coef. NaN NaN Std.Err. NaN NaN P>|z| NaN NaN lnfer_tmp_tmp_interaction Coef. NaN NaN Std.Err. NaN NaN P>|z| NaN NaN lnfer_irrigation1_pre_interaction Coef. NaN NaN Std.Err. NaN NaN P>|z| NaN NaN lnfer_irrigation12_pre_pre_interaction Coef. NaN NaN Std.Err. NaN NaN P>|z| NaN NaN lnfer_pre_interaction Coef. NaN NaN Std.Err. NaN NaN P>|z| NaN NaN lnfer_pre_pre_interaction Coef. NaN NaN Std.Err. NaN NaN P>|z| NaN NaN Model_3 Model_4 \ Value Value Variable Metric Intercept Coef. -40.172*** -41.59*** Std.Err. (3.525) (2.428) P>|z| 0.0 0.0 lnfer Coef. 0.011 -1.348*** Std.Err. (0.01) (0.225) P>|z| 0.262 0.0 irrigation1 Coef. 0.052 NaN Std.Err. (0.045) NaN P>|z| 0.248 NaN irrigation12 Coef. -0.006 NaN Std.Err. (0.007) NaN P>|z| 0.426 NaN tmp Coef. 0.519*** 0.161*** Std.Err. (0.035) (0.05) P>|z| 0.0 0.001 tmp_tmp_interaction Coef. -0.013*** -0.005*** Std.Err. (0.001) (0.001) P>|z| 0.0 0.0 pre Coef. 0.228*** 0.172*** Std.Err. (0.016) (0.012) P>|z| 0.0 0.0 pre_pre_interaction Coef. -0.014*** -0.011*** Std.Err. (0.001) (0.001) P>|z| 0.0 0.0 year Coef. 0.021*** 0.024*** Std.Err. (0.002) (0.001) P>|z| 0.0 0.0 year_geoid Coef. -0.0 -0.0*** Std.Err. (0.0) (0.0) P>|z| 0.553 0.0 lnfer_irrigation1_tmp_interaction Coef. -0.001* NaN Std.Err. (0.001) NaN P>|z| 0.091 NaN lnfer_irrigation12_tmp_tmp_interaction Coef. 0.0 NaN Std.Err. (0.0) NaN P>|z| 0.166 NaN lnfer_tmp_interaction Coef. NaN 0.113*** Std.Err. NaN (0.023) P>|z| NaN 0.0 lnfer_tmp_tmp_interaction Coef. NaN -0.002*** Std.Err. NaN (0.001) P>|z| NaN 0.0 lnfer_irrigation1_pre_interaction Coef. NaN NaN Std.Err. NaN NaN P>|z| NaN NaN lnfer_irrigation12_pre_pre_interaction Coef. NaN NaN Std.Err. NaN NaN P>|z| NaN NaN lnfer_pre_interaction Coef. NaN NaN Std.Err. NaN NaN P>|z| NaN NaN lnfer_pre_pre_interaction Coef. NaN NaN Std.Err. NaN NaN P>|z| NaN NaN Model_5 Model_6 \ Value Value Variable Metric Intercept Coef. -40.255*** -44.265*** Std.Err. (3.53) (2.442) P>|z| 0.0 0.0 lnfer Coef. 0.013 -0.093** Std.Err. (0.01) (0.045) P>|z| 0.194 0.037 irrigation1 Coef. 0.052* NaN Std.Err. (0.027) NaN P>|z| 0.057 NaN irrigation12 Coef. -0.001 NaN Std.Err. (0.002) NaN P>|z| 0.756 NaN tmp Coef. 0.526*** 0.295*** Std.Err. (0.035) (0.024) P>|z| 0.0 0.0 tmp_tmp_interaction Coef. -0.013*** -0.008*** Std.Err. (0.001) (0.001) P>|z| 0.0 0.0 pre Coef. 0.231*** 0.151*** Std.Err. (0.016) (0.023) P>|z| 0.0 0.0 pre_pre_interaction Coef. -0.014*** -0.01*** Std.Err. (0.001) (0.002) P>|z| 0.0 0.0 year Coef. 0.021*** 0.025*** Std.Err. (0.002) (0.001) P>|z| 0.0 0.0 year_geoid Coef. -0.0 -0.0*** Std.Err. (0.0) (0.0) P>|z| 0.534 0.0 lnfer_irrigation1_tmp_interaction Coef. NaN NaN Std.Err. NaN NaN P>|z| NaN NaN lnfer_irrigation12_tmp_tmp_interaction Coef. NaN NaN Std.Err. NaN NaN P>|z| NaN NaN lnfer_tmp_interaction Coef. NaN NaN Std.Err. NaN NaN P>|z| NaN NaN lnfer_tmp_tmp_interaction Coef. NaN NaN Std.Err. NaN NaN P>|z| NaN NaN lnfer_irrigation1_pre_interaction Coef. -0.006*** NaN Std.Err. (0.002) NaN P>|z| 0.002 NaN lnfer_irrigation12_pre_pre_interaction Coef. 0.0** NaN Std.Err. (0.0) NaN P>|z| 0.035 NaN lnfer_pre_interaction Coef. NaN 0.024* Std.Err. NaN (0.013) P>|z| NaN 0.062 lnfer_pre_pre_interaction Coef. NaN -0.001 Std.Err. NaN (0.001) P>|z| NaN 0.161 Model_7 Model_8 Value Value Variable Metric Intercept Coef. -39.998*** -40.028*** Std.Err. (3.517) (2.438) P>|z| 0.0 0.0 lnfer Coef. 0.011 -1.357*** Std.Err. (0.01) (0.231) P>|z| 0.258 0.0 irrigation1 Coef. 0.026 NaN Std.Err. (0.044) NaN P>|z| 0.557 NaN irrigation12 Coef. -0.002 NaN Std.Err. (0.007) NaN P>|z| 0.712 NaN tmp Coef. 0.538*** 0.178*** Std.Err. (0.035) (0.05) P>|z| 0.0 0.0 tmp_tmp_interaction Coef. -0.014*** -0.005*** Std.Err. (0.001) (0.001) P>|z| 0.0 0.0 pre Coef. 0.233*** 0.138*** Std.Err. (0.016) (0.022) P>|z| 0.0 0.0 pre_pre_interaction Coef. -0.014*** -0.009*** Std.Err. (0.001) (0.002) P>|z| 0.0 0.0 year Coef. 0.021*** 0.023*** Std.Err. (0.002) (0.001) P>|z| 0.0 0.0 year_geoid Coef. -0.0 -0.0*** Std.Err. (0.0) (0.0) P>|z| 0.602 0.0 lnfer_irrigation1_tmp_interaction Coef. 0.002 NaN Std.Err. (0.001) NaN P>|z| 0.103 NaN lnfer_irrigation12_tmp_tmp_interaction Coef. -0.0 NaN Std.Err. (0.0) NaN P>|z| 0.702 NaN lnfer_tmp_interaction Coef. NaN 0.104*** Std.Err. NaN (0.022) P>|z| NaN 0.0 lnfer_tmp_tmp_interaction Coef. NaN -0.002*** Std.Err. NaN (0.001) P>|z| NaN 0.0 lnfer_irrigation1_pre_interaction Coef. -0.013*** NaN Std.Err. (0.004) NaN P>|z| 0.001 NaN lnfer_irrigation12_pre_pre_interaction Coef. 0.0*** NaN Std.Err. (0.0) NaN P>|z| 0.003 NaN lnfer_pre_interaction Coef. NaN 0.028** Std.Err. NaN (0.013) P>|z| NaN 0.027 lnfer_pre_pre_interaction Coef. NaN -0.002* Std.Err. NaN (0.001) P>|z| NaN 0.073 Model summary statistics with significance have been saved to model_summary_statistics_with_significance.xlsx