In [1]:
import pandas as pd
import numpy as np
import statsmodels.api as sm
import matplotlib.pyplot as plt
import numpy as np
import matplotlib.pyplot as plt
import netCDF4 as nc
import pandas as pd
import os
import csv
from glob import glob
import xarray as xr
import matplotlib.ticker as ticker 

import numpy as np
import pandas as pd
import statsmodels.api as sm
import matplotlib.pyplot as plt
from matplotlib.gridspec import GridSpec

import re
from datetime import datetime

from linearmodels.panel import PanelOLS
/Users/chenchenren/anaconda3/lib/python3.11/site-packages/pandas/core/arrays/masked.py:60: UserWarning: Pandas requires version '1.3.6' or newer of 'bottleneck' (version '1.3.5' currently installed).
  from pandas.core import (
In [2]:
# Step 1: Data Preparation
# Load your dataset into a pandas DataFrame
os.chdir('/Users/chenchenren/postdoc/paper/2N and water-US/regression/')
In [3]:
excel_file = "./soybean_reg_data.csv"
# Read the specific sheet from the Excel file
ds = pd.read_csv(excel_file)
df_soybean = ds
print(df_soybean)
         geoid    year        tmp       pre  irrigation1   lnyield     lnfer  \
0       1001.0  2009.0  24.140660  8.808839     0.718851  7.609614  2.022537   
1       1001.0  2010.0  25.675484  5.438990     0.830087  7.035731  1.910005   
2       1001.0  2011.0  24.544823  5.270162     0.595086  7.466514  2.025641   
3       1001.0  2013.0  24.036592  7.514143     0.001241  7.926855  2.906756   
4       1003.0  2008.0  24.727230  8.675528     0.001980  7.934111  1.240602   
...        ...     ...        ...       ...          ...       ...       ...   
17577  55141.0  2016.0  17.486904  7.304833     0.000000  8.134343  2.860158   
17578  55141.0  2017.0  16.590237  6.467205     0.000000  7.978877  2.633171   
17579  55141.0  2018.0  17.266659  8.373576     0.000000  7.838077  2.639034   
17580  55141.0  2019.0  16.238592  8.735889     0.000000  7.912185  2.671287   
17581  55141.0  2020.0  16.202635  6.832954     0.000000  8.173033  2.625827   

       group  zone  
0          1   5.0  
1          1   5.0  
2          1   5.0  
3          1   5.0  
4          1   5.0  
...      ...   ...  
17577      2   1.0  
17578      2   1.0  
17579      2   1.0  
17580      2   1.0  
17581      2   1.0  

[17582 rows x 9 columns]

Table S12¶

In [4]:
#model 1

data = df_soybean[df_soybean['group'] == 1]

# Prepare the interaction terms
data['pre_pre_interaction'] = data['pre'] * data['pre']
data['irrigation12'] = data['irrigation1'] * data['irrigation1']
data['lnfer_irrigation1_pre_interaction'] = data['lnfer'] * data['irrigation1'] * data['pre']
data['lnfer_irrigation12_pre_pre_interaction'] = data['lnfer'] * data['irrigation1'] * data['irrigation1'] * data['pre'] * data['pre']

data['tmp_tmp_interaction'] = data['tmp'] * data['tmp']
data['lnfer_irrigation1_tmp_interaction'] = data['lnfer'] * data['irrigation1'] * data['tmp']
data['lnfer_irrigation12_tmp_tmp_interaction'] = data['lnfer'] * data['irrigation1'] * data['irrigation1'] * data['tmp'] * data['tmp']

# Define the formula for the regression model
formula = 'lnyield ~ lnfer + irrigation1 + irrigation12 + tmp + tmp_tmp_interaction + pre + pre_pre_interaction +year + C(geoid)'

# Run the regression using the formula interface with clustered standard errors
model = sm.OLS.from_formula(formula, data=data).fit(cov_type='cluster', cov_kwds={'groups': data['geoid']})

# Extract the summary table
summary = model.summary2().tables[1]

# Filter out rows corresponding to dummy variables for geoid
exclude_terms = ['C(geoid)']
pattern = '|'.join([re.escape(term) for term in exclude_terms])
filtered_summary1 = summary.loc[~summary.index.str.contains(pattern)]

fixed_effects_only_model = sm.OLS.from_formula('lnyield ~ C(geoid)', data=data).fit()

# Calculate the total sum of squares (TSS) using the model with only fixed effects
tss_within = np.sum((fixed_effects_only_model.resid) ** 2)

# Calculate the residual sum of squares (RSS) from your main model
rss_within = np.sum((model.resid) ** 2)

# Calculate the within R-squared
within_r2 = 1 - (rss_within / tss_within)

# Format the OLS summary with the within R-squared
ols_info = f"""
                            OLS Regression Results                            
==============================================================================
Dep. Variable:               lnyield   R-squared:                       {model.rsquared:.3f}
Model:                            OLS   Adj. R-squared:                  {model.rsquared_adj:.3f}
Within R-squared:                 {within_r2:.3f}
Method:                 Least Squares   F-statistic:                     {model.fvalue:.1f}
Date:                {datetime.now():%a, %d %b %Y}   Prob (F-statistic):               {model.f_pvalue:.2e}
Time:                        {datetime.now():%H:%M:%S}   Log-Likelihood:                 {model.llf:.2f}
No. Observations:                {model.nobs:.0f}   AIC:                            {model.aic:.1f}
Df Residuals:                    {model.df_resid:.0f}   BIC:                             {model.bic:.1f}
Df Model:                         {model.df_model:.0f}                                         
Covariance Type:               cluster                             
==============================================================================
"""

# Combine the OLS info with the filtered summary
final_summary1 = ols_info + "\n" + filtered_summary1.to_string()

# Print the final summary
print(final_summary1)
/var/folders/vd/0_phd7hx2n51y4412862zww00000gp/T/ipykernel_21901/1287681286.py:6: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data['pre_pre_interaction'] = data['pre'] * data['pre']
/var/folders/vd/0_phd7hx2n51y4412862zww00000gp/T/ipykernel_21901/1287681286.py:7: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data['irrigation12'] = data['irrigation1'] * data['irrigation1']
/var/folders/vd/0_phd7hx2n51y4412862zww00000gp/T/ipykernel_21901/1287681286.py:8: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data['lnfer_irrigation1_pre_interaction'] = data['lnfer'] * data['irrigation1'] * data['pre']
/var/folders/vd/0_phd7hx2n51y4412862zww00000gp/T/ipykernel_21901/1287681286.py:9: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data['lnfer_irrigation12_pre_pre_interaction'] = data['lnfer'] * data['irrigation1'] * data['irrigation1'] * data['pre'] * data['pre']
/var/folders/vd/0_phd7hx2n51y4412862zww00000gp/T/ipykernel_21901/1287681286.py:11: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data['tmp_tmp_interaction'] = data['tmp'] * data['tmp']
/var/folders/vd/0_phd7hx2n51y4412862zww00000gp/T/ipykernel_21901/1287681286.py:12: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data['lnfer_irrigation1_tmp_interaction'] = data['lnfer'] * data['irrigation1'] * data['tmp']
/var/folders/vd/0_phd7hx2n51y4412862zww00000gp/T/ipykernel_21901/1287681286.py:13: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data['lnfer_irrigation12_tmp_tmp_interaction'] = data['lnfer'] * data['irrigation1'] * data['irrigation1'] * data['tmp'] * data['tmp']
/Users/chenchenren/anaconda3/lib/python3.11/site-packages/statsmodels/base/model.py:1894: ValueWarning: covariance of constraints does not have full rank. The number of constraints is 1257, but rank is 8
  warnings.warn('covariance of constraints does not have full '
                            OLS Regression Results                            
==============================================================================
Dep. Variable:               lnyield   R-squared:                       0.737
Model:                            OLS   Adj. R-squared:                  0.684
Within R-squared:                 0.315
Method:                 Least Squares   F-statistic:                     -15082669839380.7
Date:                Fri, 15 Nov 2024   Prob (F-statistic):               1.00e+00
Time:                        11:18:00   Log-Likelihood:                 3532.10
No. Observations:                7507   AIC:                            -4548.2
Df Residuals:                    6249   BIC:                             4161.7
Df Model:                         1257                                         
Covariance Type:               cluster                             
==============================================================================

                         Coef.  Std.Err.          z          P>|z|     [0.025     0.975]
Intercept           -38.522131  1.519082 -25.358830  7.181326e-142 -41.499476 -35.544786
lnfer                 0.006019  0.010023   0.600552   5.481382e-01  -0.013625   0.025664
irrigation1          -0.015905  0.016186  -0.982618   3.257956e-01  -0.047630   0.015820
irrigation12          0.003699  0.001833   2.017882   4.360359e-02   0.000106   0.007293
tmp                   0.521217  0.035106  14.847137   7.260372e-50   0.452411   0.590023
tmp_tmp_interaction  -0.013340  0.000843 -15.832945   1.843839e-56  -0.014991  -0.011688
pre                   0.227345  0.016284  13.961642   2.672075e-44   0.195430   0.259260
pre_pre_interaction  -0.014297  0.001160 -12.326442   6.526549e-35  -0.016570  -0.012023
year                  0.020107  0.000754  26.675887  8.965906e-157   0.018629   0.021584
In [5]:
#model 2

data = df_soybean[df_soybean['group'] == 2]

# Prepare the interaction terms
data['pre_pre_interaction'] = data['pre'] * data['pre']
data['lnfer_pre_interaction'] = data['lnfer'] * data['pre']
data['lnfer_pre_pre_interaction'] = data['lnfer'] * data['pre'] * data['pre']

data['tmp_tmp_interaction'] = data['tmp'] * data['tmp']
data['lnfer_tmp_interaction'] = data['lnfer'] * data['tmp']
data['lnfer_tmp_tmp_interaction'] = data['lnfer'] * data['tmp'] * data['tmp']
data1=data

# Define the formula for the regression model
formula = 'lnyield ~ lnfer  + tmp + tmp_tmp_interaction + pre + pre_pre_interaction +year+ C(geoid)'

# Run the regression using the formula interface with clustered standard errors
model = sm.OLS.from_formula(formula, data=data1).fit(cov_type='cluster', cov_kwds={'groups': data['geoid']})

# Extract the summary table
summary = model.summary2().tables[1]

# Filter out rows corresponding to dummy variables for geoid
exclude_terms = ['C(geoid)']
pattern = '|'.join([re.escape(term) for term in exclude_terms])
filtered_summary2 = summary.loc[~summary.index.str.contains(pattern)]

data1['trend']=data1['year']
data1 = data1.set_index(['geoid','year'])
# Define the formula for the fixed effects regression model with PanelOLS
formula_fe = (
    'lnyield ~ lnfer + tmp + tmp_tmp_interaction + pre + pre_pre_interaction +trend+ EntityEffects '
)

# Fit the fixed effects model using PanelOLS
fe_model = PanelOLS.from_formula(formula_fe, data=data1)
fe_results = fe_model.fit(cov_type='robust')


# Format the OLS summary with the within R-squared
ols_info = f"""
                            OLS Regression Results                            
==============================================================================
Dep. Variable:               lnyield   R-squared:                       {model.rsquared:.3f}
Model:                            OLS   Adj. R-squared:                  {model.rsquared_adj:.3f}
Within R-squared:                 {fe_results.rsquared_within:.3f}
Method:                 Least Squares   F-statistic:                     {model.fvalue:.1f}
Date:                {datetime.now():%a, %d %b %Y}   Prob (F-statistic):               {model.f_pvalue:.2e}
Time:                        {datetime.now():%H:%M:%S}   Log-Likelihood:                 {model.llf:.2f}
No. Observations:                {model.nobs:.0f}   AIC:                            {model.aic:.1f}
Df Residuals:                    {model.df_resid:.0f}   BIC:                             {model.bic:.1f}
Df Model:                         {model.df_model:.0f}                                         
Covariance Type:               cluster                             
==============================================================================
"""

# Combine the OLS info with the filtered summary
final_summary2 = ols_info + "\n" + filtered_summary2.to_string()

# Print the final summary
print(final_summary2)
/var/folders/vd/0_phd7hx2n51y4412862zww00000gp/T/ipykernel_21901/2519090951.py:6: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data['pre_pre_interaction'] = data['pre'] * data['pre']
/var/folders/vd/0_phd7hx2n51y4412862zww00000gp/T/ipykernel_21901/2519090951.py:7: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data['lnfer_pre_interaction'] = data['lnfer'] * data['pre']
/var/folders/vd/0_phd7hx2n51y4412862zww00000gp/T/ipykernel_21901/2519090951.py:8: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data['lnfer_pre_pre_interaction'] = data['lnfer'] * data['pre'] * data['pre']
/var/folders/vd/0_phd7hx2n51y4412862zww00000gp/T/ipykernel_21901/2519090951.py:10: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data['tmp_tmp_interaction'] = data['tmp'] * data['tmp']
/var/folders/vd/0_phd7hx2n51y4412862zww00000gp/T/ipykernel_21901/2519090951.py:11: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data['lnfer_tmp_interaction'] = data['lnfer'] * data['tmp']
/var/folders/vd/0_phd7hx2n51y4412862zww00000gp/T/ipykernel_21901/2519090951.py:12: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data['lnfer_tmp_tmp_interaction'] = data['lnfer'] * data['tmp'] * data['tmp']
/Users/chenchenren/anaconda3/lib/python3.11/site-packages/statsmodels/base/model.py:1894: ValueWarning: covariance of constraints does not have full rank. The number of constraints is 1584, but rank is 6
  warnings.warn('covariance of constraints does not have full '
/var/folders/vd/0_phd7hx2n51y4412862zww00000gp/T/ipykernel_21901/2519090951.py:29: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data1['trend']=data1['year']
                            OLS Regression Results                            
==============================================================================
Dep. Variable:               lnyield   R-squared:                       0.741
Model:                            OLS   Adj. R-squared:                  0.692
Within R-squared:                 0.319
Method:                 Least Squares   F-statistic:                     -4862979059989.5
Date:                Fri, 15 Nov 2024   Prob (F-statistic):               1.00e+00
Time:                        11:18:07   Log-Likelihood:                 5641.18
No. Observations:                10075   AIC:                            -8112.4
Df Residuals:                    8490   BIC:                             3327.9
Df Model:                         1584                                         
Covariance Type:               cluster                             
==============================================================================

                         Coef.  Std.Err.          z          P>|z|     [0.025     0.975]
Intercept           -33.001536  1.073918 -30.730047  2.259444e-207 -35.106376 -30.896697
lnfer                 0.002571  0.007563   0.340010   7.338487e-01  -0.012251   0.017394
tmp                   0.286666  0.023890  11.999453   3.576503e-33   0.239842   0.333489
tmp_tmp_interaction  -0.007424  0.000623 -11.923510   8.926738e-33  -0.008644  -0.006203
pre                   0.180895  0.012406  14.581525   3.681989e-48   0.156580   0.205210
pre_pre_interaction  -0.011249  0.000888 -12.667173   8.989585e-37  -0.012990  -0.009509
year                  0.018752  0.000539  34.807651  1.863234e-265   0.017696   0.019808
In [6]:
#model 3

# Filter data for irrigated 
data = df_soybean[df_soybean['group'] == 1]

# Prepare the interaction terms
data['pre_pre_interaction'] = data['pre'] * data['pre']
data['irrigation12'] = data['irrigation1'] * data['irrigation1']
data['lnfer_irrigation1_pre_interaction'] = data['lnfer'] * data['irrigation1'] * data['pre']
data['lnfer_irrigation12_pre_pre_interaction'] = data['lnfer'] * data['irrigation1'] * data['irrigation1'] * data['pre'] * data['pre']

data['tmp_tmp_interaction'] = data['tmp'] * data['tmp']
data['lnfer_irrigation1_tmp_interaction'] = data['lnfer'] * data['irrigation1'] * data['tmp']
data['lnfer_irrigation12_tmp_tmp_interaction'] = data['lnfer'] * data['irrigation1'] * data['irrigation1'] * data['tmp'] * data['tmp']

# Define the formula for the regression model
formula = 'lnyield ~ lnfer + irrigation1 + irrigation12 + tmp + tmp_tmp_interaction + pre + pre_pre_interaction +year+lnfer_irrigation1_tmp_interaction+ lnfer_irrigation12_tmp_tmp_interaction+ C(geoid)'

# Run the regression using the formula interface with clustered standard errors
model = sm.OLS.from_formula(formula, data=data).fit(cov_type='cluster', cov_kwds={'groups': data['geoid']})

# Extract the summary table
summary = model.summary2().tables[1]

# Filter out rows corresponding to dummy variables for geoid
exclude_terms = ['C(geoid)']
pattern = '|'.join([re.escape(term) for term in exclude_terms])
filtered_summary3 = summary.loc[~summary.index.str.contains(pattern)]

fixed_effects_only_model = sm.OLS.from_formula('lnyield ~ C(geoid)', data=data).fit()

# Calculate the total sum of squares (TSS) using the model with only fixed effects
tss_within = np.sum((fixed_effects_only_model.resid) ** 2)

# Calculate the residual sum of squares (RSS) from your main model
rss_within = np.sum((model.resid) ** 2)

# Calculate the within R-squared
within_r2 = 1 - (rss_within / tss_within)

# Format the OLS summary with the within R-squared
ols_info = f"""
                            OLS Regression Results                            
==============================================================================
Dep. Variable:               lnyield   R-squared:                       {model.rsquared:.3f}
Model:                            OLS   Adj. R-squared:                  {model.rsquared_adj:.3f}
Within R-squared:                 {within_r2:.3f}
Method:                 Least Squares   F-statistic:                     {model.fvalue:.1f}
Date:                {datetime.now():%a, %d %b %Y}   Prob (F-statistic):               {model.f_pvalue:.2e}
Time:                        {datetime.now():%H:%M:%S}   Log-Likelihood:                 {model.llf:.2f}
No. Observations:                {model.nobs:.0f}   AIC:                            {model.aic:.1f}
Df Residuals:                    {model.df_resid:.0f}   BIC:                             {model.bic:.1f}
Df Model:                         {model.df_model:.0f}                                         
Covariance Type:               cluster                             
==============================================================================
"""

# Combine the OLS info with the filtered summary
final_summary3 = ols_info + "\n" + filtered_summary3.to_string()

# Print the final summary
print(final_summary3)
/var/folders/vd/0_phd7hx2n51y4412862zww00000gp/T/ipykernel_21901/748763980.py:7: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data['pre_pre_interaction'] = data['pre'] * data['pre']
/var/folders/vd/0_phd7hx2n51y4412862zww00000gp/T/ipykernel_21901/748763980.py:8: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data['irrigation12'] = data['irrigation1'] * data['irrigation1']
/var/folders/vd/0_phd7hx2n51y4412862zww00000gp/T/ipykernel_21901/748763980.py:9: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data['lnfer_irrigation1_pre_interaction'] = data['lnfer'] * data['irrigation1'] * data['pre']
/var/folders/vd/0_phd7hx2n51y4412862zww00000gp/T/ipykernel_21901/748763980.py:10: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data['lnfer_irrigation12_pre_pre_interaction'] = data['lnfer'] * data['irrigation1'] * data['irrigation1'] * data['pre'] * data['pre']
/var/folders/vd/0_phd7hx2n51y4412862zww00000gp/T/ipykernel_21901/748763980.py:12: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data['tmp_tmp_interaction'] = data['tmp'] * data['tmp']
/var/folders/vd/0_phd7hx2n51y4412862zww00000gp/T/ipykernel_21901/748763980.py:13: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data['lnfer_irrigation1_tmp_interaction'] = data['lnfer'] * data['irrigation1'] * data['tmp']
/var/folders/vd/0_phd7hx2n51y4412862zww00000gp/T/ipykernel_21901/748763980.py:14: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data['lnfer_irrigation12_tmp_tmp_interaction'] = data['lnfer'] * data['irrigation1'] * data['irrigation1'] * data['tmp'] * data['tmp']
/Users/chenchenren/anaconda3/lib/python3.11/site-packages/statsmodels/base/model.py:1894: ValueWarning: covariance of constraints does not have full rank. The number of constraints is 1259, but rank is 10
  warnings.warn('covariance of constraints does not have full '
                            OLS Regression Results                            
==============================================================================
Dep. Variable:               lnyield   R-squared:                       0.737
Model:                            OLS   Adj. R-squared:                  0.684
Within R-squared:                 0.316
Method:                 Least Squares   F-statistic:                     8776751685601.0
Date:                Fri, 15 Nov 2024   Prob (F-statistic):               0.00e+00
Time:                        11:18:13   Log-Likelihood:                 3535.85
No. Observations:                7507   AIC:                            -4551.7
Df Residuals:                    6247   BIC:                             4172.0
Df Model:                         1259                                         
Covariance Type:               cluster                             
==============================================================================

                                            Coef.  Std.Err.          z          P>|z|     [0.025     0.975]
Intercept                              -38.321133  1.510202 -25.374835  4.782053e-142 -41.281075 -35.361191
lnfer                                    0.010810  0.009956   1.085828   2.775552e-01  -0.008703   0.030323
irrigation1                              0.050252  0.045064   1.115135   2.647928e-01  -0.038071   0.138576
irrigation12                            -0.005374  0.007057  -0.761398   4.464196e-01  -0.019206   0.008459
tmp                                      0.519718  0.034878  14.901235   3.235414e-50   0.451359   0.588077
tmp_tmp_interaction                     -0.013297  0.000836 -15.901556   6.181022e-57  -0.014936  -0.011658
pre                                      0.228471  0.016425  13.910163   5.495409e-44   0.196279   0.260663
pre_pre_interaction                     -0.014383  0.001171 -12.278522   1.181363e-34  -0.016678  -0.012087
year                                     0.020004  0.000750  26.659966  1.371681e-156   0.018534   0.021475
lnfer_irrigation1_tmp_interaction       -0.001265  0.000770  -1.642240   1.005404e-01  -0.002775   0.000245
lnfer_irrigation12_tmp_tmp_interaction   0.000007  0.000005   1.344235   1.788725e-01  -0.000003   0.000017
In [7]:
#model 4

# Filter data for rainfed 
data = df_soybean[df_soybean['group'] == 2]

# Prepare the interaction terms
data['pre_pre_interaction'] = data['pre'] * data['pre']
data['lnfer_pre_interaction'] = data['lnfer'] * data['pre']
data['lnfer_pre_pre_interaction'] = data['lnfer'] * data['pre'] * data['pre']

data['tmp_tmp_interaction'] = data['tmp'] * data['tmp']
data['lnfer_tmp_interaction'] = data['lnfer'] * data['tmp']
data['lnfer_tmp_tmp_interaction'] = data['lnfer'] * data['tmp'] * data['tmp']
data1=data

# Define the formula for the regression model
formula = 'lnyield ~ lnfer  + tmp + tmp_tmp_interaction + pre + pre_pre_interaction +year+lnfer_tmp_interaction+ lnfer_tmp_tmp_interaction + C(geoid)'

# Run the regression using the formula interface with clustered standard errors
model = sm.OLS.from_formula(formula, data=data).fit(cov_type='cluster', cov_kwds={'groups': data['geoid']})

# Extract the summary table
summary = model.summary2().tables[1]

# Filter out rows corresponding to dummy variables for geoid
exclude_terms = ['C(geoid)']
pattern = '|'.join([re.escape(term) for term in exclude_terms])
filtered_summary4 = summary.loc[~summary.index.str.contains(pattern)]
data1['trend']=data1['year']
# Calculate the total sum of squares (TSS) using the model with only fixed effects
data1 = data1.set_index(['geoid','year'])
# Define the formula for the fixed effects regression model with PanelOLS
formula_fe = (
    'lnyield ~ lnfer  + tmp + tmp_tmp_interaction + pre + pre_pre_interaction +lnfer_tmp_interaction+ lnfer_tmp_tmp_interaction +trend+ EntityEffects '
)
# Fit the fixed effects model using PanelOLS
fe_model = PanelOLS.from_formula(formula_fe, data=data1)
fe_results = fe_model.fit(cov_type='robust')

# Format the OLS summary with the within R-squared
ols_info = f"""
                            OLS Regression Results                            
==============================================================================
Dep. Variable:               lnyield   R-squared:                       {model.rsquared:.3f}
Model:                            OLS   Adj. R-squared:                  {model.rsquared_adj:.3f}
Within R-squared:                 {fe_results.rsquared_within:.3f}
Method:                 Least Squares   F-statistic:                     {model.fvalue:.1f}
Date:                {datetime.now():%a, %d %b %Y}   Prob (F-statistic):               {model.f_pvalue:.2e}
Time:                        {datetime.now():%H:%M:%S}   Log-Likelihood:                 {model.llf:.2f}
No. Observations:                {model.nobs:.0f}   AIC:                            {model.aic:.1f}
Df Residuals:                    {model.df_resid:.0f}   BIC:                             {model.bic:.1f}
Df Model:                         {model.df_model:.0f}                                         
Covariance Type:               cluster                             
==============================================================================
"""

# Combine the OLS info with the filtered summary
final_summary4 = ols_info + "\n" + filtered_summary4.to_string()

# Print the final summary
print(final_summary4)
/var/folders/vd/0_phd7hx2n51y4412862zww00000gp/T/ipykernel_21901/942356029.py:7: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data['pre_pre_interaction'] = data['pre'] * data['pre']
/var/folders/vd/0_phd7hx2n51y4412862zww00000gp/T/ipykernel_21901/942356029.py:8: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data['lnfer_pre_interaction'] = data['lnfer'] * data['pre']
/var/folders/vd/0_phd7hx2n51y4412862zww00000gp/T/ipykernel_21901/942356029.py:9: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data['lnfer_pre_pre_interaction'] = data['lnfer'] * data['pre'] * data['pre']
/var/folders/vd/0_phd7hx2n51y4412862zww00000gp/T/ipykernel_21901/942356029.py:11: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data['tmp_tmp_interaction'] = data['tmp'] * data['tmp']
/var/folders/vd/0_phd7hx2n51y4412862zww00000gp/T/ipykernel_21901/942356029.py:12: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data['lnfer_tmp_interaction'] = data['lnfer'] * data['tmp']
/var/folders/vd/0_phd7hx2n51y4412862zww00000gp/T/ipykernel_21901/942356029.py:13: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data['lnfer_tmp_tmp_interaction'] = data['lnfer'] * data['tmp'] * data['tmp']
                            OLS Regression Results                            
==============================================================================
Dep. Variable:               lnyield   R-squared:                       0.747
Model:                            OLS   Adj. R-squared:                  0.700
Within R-squared:                 0.336
Method:                 Least Squares   F-statistic:                     -1325714146266.6
Date:                Fri, 15 Nov 2024   Prob (F-statistic):               1.00e+00
Time:                        11:18:20   Log-Likelihood:                 5766.72
No. Observations:                10075   AIC:                            -8359.4
Df Residuals:                    8488   BIC:                             3095.2
Df Model:                         1586                                         
Covariance Type:               cluster                             
==============================================================================

                               Coef.  Std.Err.          z          P>|z|     [0.025     0.975]
Intercept                 -31.877339  1.130215 -28.204664  5.126040e-175 -34.092520 -29.662158
lnfer                      -1.439880  0.225001  -6.399437   1.559512e-10  -1.880874  -0.998886
tmp                         0.141788  0.050195   2.824753   4.731705e-03   0.043408   0.240168
tmp_tmp_interaction        -0.004595  0.001308  -3.513934   4.415227e-04  -0.007157  -0.002032
pre                         0.173106  0.012165  14.230210   5.950317e-46   0.149264   0.196948
pre_pre_interaction        -0.010701  0.000873 -12.251247   1.654332e-34  -0.012413  -0.008989
year                        0.019085  0.000544  35.094109  8.290495e-270   0.018019   0.020151
lnfer_tmp_interaction       0.122043  0.022729   5.369542   7.893673e-08   0.077496   0.166591
lnfer_tmp_tmp_interaction  -0.002460  0.000568  -4.334601   1.460244e-05  -0.003573  -0.001348
/Users/chenchenren/anaconda3/lib/python3.11/site-packages/statsmodels/base/model.py:1894: ValueWarning: covariance of constraints does not have full rank. The number of constraints is 1586, but rank is 8
  warnings.warn('covariance of constraints does not have full '
/var/folders/vd/0_phd7hx2n51y4412862zww00000gp/T/ipykernel_21901/942356029.py:29: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data1['trend']=data1['year']
In [8]:
#model 5

# Filter data for irrigated 
data = df_soybean[df_soybean['group'] == 1]

# Prepare the interaction terms
data['pre_pre_interaction'] = data['pre'] * data['pre']
data['irrigation12'] = data['irrigation1'] * data['irrigation1']
data['lnfer_irrigation1_pre_interaction'] = data['lnfer'] * data['irrigation1'] * data['pre']
data['lnfer_irrigation12_pre_pre_interaction'] = data['lnfer'] * data['irrigation1'] * data['irrigation1'] * data['pre'] * data['pre']

data['tmp_tmp_interaction'] = data['tmp'] * data['tmp']
data['lnfer_irrigation1_tmp_interaction'] = data['lnfer'] * data['irrigation1'] * data['tmp']
data['lnfer_irrigation12_tmp_tmp_interaction'] = data['lnfer'] * data['irrigation1'] * data['irrigation1'] * data['tmp'] * data['tmp']

# Define the formula for the regression model
formula = 'lnyield ~ lnfer + irrigation1 + irrigation12 + tmp + tmp_tmp_interaction + pre + pre_pre_interaction +year +lnfer_irrigation1_pre_interaction+ lnfer_irrigation12_pre_pre_interaction+  C(geoid)'

# Run the regression using the formula interface with clustered standard errors
model = sm.OLS.from_formula(formula, data=data).fit(cov_type='cluster', cov_kwds={'groups': data['geoid']})

# Extract the summary table
summary = model.summary2().tables[1]

# Filter out rows corresponding to dummy variables for geoid
exclude_terms = ['C(geoid)']
pattern = '|'.join([re.escape(term) for term in exclude_terms])
filtered_summary5 = summary.loc[~summary.index.str.contains(pattern)]

fixed_effects_only_model = sm.OLS.from_formula('lnyield ~ C(geoid)', data=data).fit()

# Calculate the total sum of squares (TSS) using the model with only fixed effects
tss_within = np.sum((fixed_effects_only_model.resid) ** 2)

# Calculate the residual sum of squares (RSS) from your main model
rss_within = np.sum((model.resid) ** 2)

# Calculate the within R-squared
within_r2 = 1 - (rss_within / tss_within)

# Format the OLS summary with the within R-squared
ols_info = f"""
                            OLS Regression Results                            
==============================================================================
Dep. Variable:               lnyield   R-squared:                       {model.rsquared:.3f}
Model:                            OLS   Adj. R-squared:                  {model.rsquared_adj:.3f}
Within R-squared:                 {within_r2:.3f}
Method:                 Least Squares   F-statistic:                     {model.fvalue:.1f}
Date:                {datetime.now():%a, %d %b %Y}   Prob (F-statistic):               {model.f_pvalue:.2e}
Time:                        {datetime.now():%H:%M:%S}   Log-Likelihood:                 {model.llf:.2f}
No. Observations:                {model.nobs:.0f}   AIC:                            {model.aic:.1f}
Df Residuals:                    {model.df_resid:.0f}   BIC:                             {model.bic:.1f}
Df Model:                         {model.df_model:.0f}                                         
Covariance Type:               cluster                             
==============================================================================
"""

# Combine the OLS info with the filtered summary
final_summary5 = ols_info + "\n" + filtered_summary5.to_string()

# Print the final summary
print(final_summary5)
/var/folders/vd/0_phd7hx2n51y4412862zww00000gp/T/ipykernel_21901/1023417680.py:7: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data['pre_pre_interaction'] = data['pre'] * data['pre']
/var/folders/vd/0_phd7hx2n51y4412862zww00000gp/T/ipykernel_21901/1023417680.py:8: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data['irrigation12'] = data['irrigation1'] * data['irrigation1']
/var/folders/vd/0_phd7hx2n51y4412862zww00000gp/T/ipykernel_21901/1023417680.py:9: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data['lnfer_irrigation1_pre_interaction'] = data['lnfer'] * data['irrigation1'] * data['pre']
/var/folders/vd/0_phd7hx2n51y4412862zww00000gp/T/ipykernel_21901/1023417680.py:10: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data['lnfer_irrigation12_pre_pre_interaction'] = data['lnfer'] * data['irrigation1'] * data['irrigation1'] * data['pre'] * data['pre']
/var/folders/vd/0_phd7hx2n51y4412862zww00000gp/T/ipykernel_21901/1023417680.py:12: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data['tmp_tmp_interaction'] = data['tmp'] * data['tmp']
/var/folders/vd/0_phd7hx2n51y4412862zww00000gp/T/ipykernel_21901/1023417680.py:13: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data['lnfer_irrigation1_tmp_interaction'] = data['lnfer'] * data['irrigation1'] * data['tmp']
/var/folders/vd/0_phd7hx2n51y4412862zww00000gp/T/ipykernel_21901/1023417680.py:14: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data['lnfer_irrigation12_tmp_tmp_interaction'] = data['lnfer'] * data['irrigation1'] * data['irrigation1'] * data['tmp'] * data['tmp']
/Users/chenchenren/anaconda3/lib/python3.11/site-packages/statsmodels/base/model.py:1894: ValueWarning: covariance of constraints does not have full rank. The number of constraints is 1259, but rank is 10
  warnings.warn('covariance of constraints does not have full '
                            OLS Regression Results                            
==============================================================================
Dep. Variable:               lnyield   R-squared:                       0.738
Model:                            OLS   Adj. R-squared:                  0.685
Within R-squared:                 0.317
Method:                 Least Squares   F-statistic:                     13218053090066.6
Date:                Fri, 15 Nov 2024   Prob (F-statistic):               0.00e+00
Time:                        11:18:26   Log-Likelihood:                 3545.73
No. Observations:                7507   AIC:                            -4571.5
Df Residuals:                    6247   BIC:                             4152.3
Df Model:                         1259                                         
Covariance Type:               cluster                             
==============================================================================

                                            Coef.  Std.Err.          z          P>|z|     [0.025     0.975]
Intercept                              -38.308661  1.504098 -25.469528  4.290598e-143 -41.256639 -35.360684
lnfer                                    0.012705  0.010011   1.269066   2.044177e-01  -0.006917   0.032326
irrigation1                              0.051396  0.027284   1.883720   5.960288e-02  -0.002080   0.104872
irrigation12                            -0.000650  0.002281  -0.284937   7.756924e-01  -0.005122   0.003821
tmp                                      0.527072  0.035088  15.021570   5.303620e-51   0.458301   0.595842
tmp_tmp_interaction                     -0.013475  0.000842 -16.006810   1.145340e-57  -0.015125  -0.011825
pre                                      0.231881  0.016433  14.110700   3.263211e-45   0.199673   0.264089
pre_pre_interaction                     -0.014535  0.001168 -12.440248   1.580124e-35  -0.016825  -0.012245
year                                     0.019955  0.000744  26.809175  2.525887e-158   0.018497   0.021414
lnfer_irrigation1_pre_interaction       -0.006095  0.001979  -3.080610   2.065769e-03  -0.009973  -0.002217
lnfer_irrigation12_pre_pre_interaction   0.000070  0.000033   2.084066   3.715416e-02   0.000004   0.000135
In [9]:
#model 6

# Filter data for rainfed 
data = df_soybean[df_soybean['group'] == 2]

# Prepare the interaction terms
data['pre_pre_interaction'] = data['pre'] * data['pre']
data['lnfer_pre_interaction'] = data['lnfer'] * data['pre']
data['lnfer_pre_pre_interaction'] = data['lnfer'] * data['pre'] * data['pre']

data['tmp_tmp_interaction'] = data['tmp'] * data['tmp']
data['lnfer_tmp_interaction'] = data['lnfer'] * data['tmp']
data['lnfer_tmp_tmp_interaction'] = data['lnfer'] * data['tmp'] * data['tmp']
data1=data

# Define the formula for the regression model
formula = 'lnyield ~ lnfer  + tmp + tmp_tmp_interaction + pre + pre_pre_interaction +year+lnfer_pre_interaction+ lnfer_pre_pre_interaction + C(geoid)'

# Run the regression using the formula interface with clustered standard errors
model = sm.OLS.from_formula(formula, data=data).fit(cov_type='cluster', cov_kwds={'groups': data['geoid']})

# Extract the summary table
summary = model.summary2().tables[1]

# Filter out rows corresponding to dummy variables for geoid
exclude_terms = ['C(geoid)']
pattern = '|'.join([re.escape(term) for term in exclude_terms])
filtered_summary6 = summary.loc[~summary.index.str.contains(pattern)]

data1['trend']=data1['year']
data1 = data1.set_index(['geoid','year'])
# Define the formula for the fixed effects regression model with PanelOLS
formula_fe = (
    'lnyield ~ lnfer  + tmp + tmp_tmp_interaction + pre + pre_pre_interaction +lnfer_pre_interaction+ lnfer_pre_pre_interaction +trend+ EntityEffects '
)
# Fit the fixed effects model using PanelOLS
fe_model = PanelOLS.from_formula(formula_fe, data=data1)
fe_results = fe_model.fit(cov_type='robust')


# Format the OLS summary with the within R-squared
ols_info = f"""
                            OLS Regression Results                            
==============================================================================
Dep. Variable:               lnyield   R-squared:                       {model.rsquared:.3f}
Model:                            OLS   Adj. R-squared:                  {model.rsquared_adj:.3f}
Within R-squared:                {fe_results.rsquared_within:.3f}
Method:                 Least Squares   F-statistic:                     {model.fvalue:.1f}
Date:                {datetime.now():%a, %d %b %Y}   Prob (F-statistic):               {model.f_pvalue:.2e}
Time:                        {datetime.now():%H:%M:%S}   Log-Likelihood:                 {model.llf:.2f}
No. Observations:                {model.nobs:.0f}   AIC:                            {model.aic:.1f}
Df Residuals:                    {model.df_resid:.0f}   BIC:                             {model.bic:.1f}
Df Model:                         {model.df_model:.0f}                                         
Covariance Type:               cluster                             
==============================================================================
"""

# Combine the OLS info with the filtered summary
final_summary6 = ols_info + "\n" + filtered_summary6.to_string()

# Print the final summary
print(final_summary6)
/var/folders/vd/0_phd7hx2n51y4412862zww00000gp/T/ipykernel_21901/1781115856.py:7: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data['pre_pre_interaction'] = data['pre'] * data['pre']
/var/folders/vd/0_phd7hx2n51y4412862zww00000gp/T/ipykernel_21901/1781115856.py:8: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data['lnfer_pre_interaction'] = data['lnfer'] * data['pre']
/var/folders/vd/0_phd7hx2n51y4412862zww00000gp/T/ipykernel_21901/1781115856.py:9: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data['lnfer_pre_pre_interaction'] = data['lnfer'] * data['pre'] * data['pre']
/var/folders/vd/0_phd7hx2n51y4412862zww00000gp/T/ipykernel_21901/1781115856.py:11: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data['tmp_tmp_interaction'] = data['tmp'] * data['tmp']
/var/folders/vd/0_phd7hx2n51y4412862zww00000gp/T/ipykernel_21901/1781115856.py:12: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data['lnfer_tmp_interaction'] = data['lnfer'] * data['tmp']
/var/folders/vd/0_phd7hx2n51y4412862zww00000gp/T/ipykernel_21901/1781115856.py:13: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data['lnfer_tmp_tmp_interaction'] = data['lnfer'] * data['tmp'] * data['tmp']
                            OLS Regression Results                            
==============================================================================
Dep. Variable:               lnyield   R-squared:                       0.742
Model:                            OLS   Adj. R-squared:                  0.694
Within R-squared:                0.323
Method:                 Least Squares   F-statistic:                     -3726253873595.7
Date:                Fri, 15 Nov 2024   Prob (F-statistic):               1.00e+00
Time:                        11:18:32   Log-Likelihood:                 5668.72
No. Observations:                10075   AIC:                            -8163.4
Df Residuals:                    8488   BIC:                             3291.2
Df Model:                         1586                                         
Covariance Type:               cluster                             
==============================================================================

                               Coef.  Std.Err.          z          P>|z|     [0.025     0.975]
Intercept                 -32.265893  1.081751 -29.827472  1.720670e-195 -34.386086 -30.145701
lnfer                      -0.098089  0.044472  -2.205649   2.740859e-02  -0.185252  -0.010926
tmp                         0.287700  0.024073  11.950925   6.420654e-33   0.240517   0.334883
tmp_tmp_interaction        -0.007440  0.000627 -11.871190   1.670733e-32  -0.008668  -0.006211
pre                         0.151558  0.022613   6.702369   2.050670e-11   0.107238   0.195878
pre_pre_interaction        -0.009969  0.001658  -6.013411   1.816593e-09  -0.013218  -0.006720
year                        0.018450  0.000541  34.116538  4.194062e-255   0.017390   0.019510
lnfer_pre_interaction       0.024954  0.012817   1.946900   5.154671e-02  -0.000167   0.050075
lnfer_pre_pre_interaction  -0.001287  0.000892  -1.443681   1.488287e-01  -0.003035   0.000460
/Users/chenchenren/anaconda3/lib/python3.11/site-packages/statsmodels/base/model.py:1894: ValueWarning: covariance of constraints does not have full rank. The number of constraints is 1586, but rank is 8
  warnings.warn('covariance of constraints does not have full '
/var/folders/vd/0_phd7hx2n51y4412862zww00000gp/T/ipykernel_21901/1781115856.py:30: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data1['trend']=data1['year']
In [10]:
#model 7

# Filter data for irrigated
data = df_soybean[df_soybean['group'] == 1]

# Prepare the interaction terms
data['pre_pre_interaction'] = data['pre'] * data['pre']
data['irrigation12'] = data['irrigation1'] * data['irrigation1']
data['lnfer_irrigation1_pre_interaction'] = data['lnfer'] * data['irrigation1'] * data['pre']
data['lnfer_irrigation12_pre_pre_interaction'] = data['lnfer'] * data['irrigation1'] * data['irrigation1'] * data['pre'] * data['pre']

data['tmp_tmp_interaction'] = data['tmp'] * data['tmp']
data['lnfer_irrigation1_tmp_interaction'] = data['lnfer'] * data['irrigation1'] * data['tmp']
data['lnfer_irrigation12_tmp_tmp_interaction'] = data['lnfer'] * data['irrigation1'] * data['irrigation1'] * data['tmp'] * data['tmp']

# Define the formula for the regression model
formula = 'lnyield ~ lnfer + irrigation1 + irrigation12 + tmp + tmp_tmp_interaction +year+ lnfer_irrigation1_tmp_interaction+ lnfer_irrigation12_tmp_tmp_interaction+lnfer_irrigation1_pre_interaction+ lnfer_irrigation12_pre_pre_interaction+ pre + pre_pre_interaction + C(geoid)'

# Run the regression using the formula interface with clustered standard errors
model = sm.OLS.from_formula(formula, data=data).fit(cov_type='cluster', cov_kwds={'groups': data['geoid']})

# Extract the summary table
summary = model.summary2().tables[1]

# Filter out rows corresponding to dummy variables for geoid
exclude_terms = ['C(geoid)']
pattern = '|'.join([re.escape(term) for term in exclude_terms])
filtered_summary7 = summary.loc[~summary.index.str.contains(pattern)]

fixed_effects_only_model = sm.OLS.from_formula('lnyield ~ C(geoid)', data=data).fit()

# Calculate the total sum of squares (TSS) using the model with only fixed effects
tss_within = np.sum((fixed_effects_only_model.resid) ** 2)

# Calculate the residual sum of squares (RSS) from your main model
rss_within = np.sum((model.resid) ** 2)

# Calculate the within R-squared
within_r2 = 1 - (rss_within / tss_within)

# Format the OLS summary with the within R-squared
ols_info = f"""
                            OLS Regression Results                            
==============================================================================
Dep. Variable:               lnyield   R-squared:                       {model.rsquared:.3f}
Model:                            OLS   Adj. R-squared:                  {model.rsquared_adj:.3f}
Within R-squared:                 {within_r2:.3f}
Method:                 Least Squares   F-statistic:                     {model.fvalue:.1f}
Date:                {datetime.now():%a, %d %b %Y}   Prob (F-statistic):               {model.f_pvalue:.2e}
Time:                        {datetime.now():%H:%M:%S}   Log-Likelihood:                 {model.llf:.2f}
No. Observations:                {model.nobs:.0f}   AIC:                            {model.aic:.1f}
Df Residuals:                    {model.df_resid:.0f}   BIC:                             {model.bic:.1f}
Df Model:                         {model.df_model:.0f}                                         
Covariance Type:               cluster                             
==============================================================================
"""

# Combine the OLS info with the filtered summary
final_summary7 = ols_info + "\n" + filtered_summary7.to_string()

# Print the final summary
print(final_summary7)
/var/folders/vd/0_phd7hx2n51y4412862zww00000gp/T/ipykernel_21901/1137327791.py:7: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data['pre_pre_interaction'] = data['pre'] * data['pre']
/var/folders/vd/0_phd7hx2n51y4412862zww00000gp/T/ipykernel_21901/1137327791.py:8: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data['irrigation12'] = data['irrigation1'] * data['irrigation1']
/var/folders/vd/0_phd7hx2n51y4412862zww00000gp/T/ipykernel_21901/1137327791.py:9: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data['lnfer_irrigation1_pre_interaction'] = data['lnfer'] * data['irrigation1'] * data['pre']
/var/folders/vd/0_phd7hx2n51y4412862zww00000gp/T/ipykernel_21901/1137327791.py:10: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data['lnfer_irrigation12_pre_pre_interaction'] = data['lnfer'] * data['irrigation1'] * data['irrigation1'] * data['pre'] * data['pre']
/var/folders/vd/0_phd7hx2n51y4412862zww00000gp/T/ipykernel_21901/1137327791.py:12: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data['tmp_tmp_interaction'] = data['tmp'] * data['tmp']
/var/folders/vd/0_phd7hx2n51y4412862zww00000gp/T/ipykernel_21901/1137327791.py:13: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data['lnfer_irrigation1_tmp_interaction'] = data['lnfer'] * data['irrigation1'] * data['tmp']
/var/folders/vd/0_phd7hx2n51y4412862zww00000gp/T/ipykernel_21901/1137327791.py:14: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data['lnfer_irrigation12_tmp_tmp_interaction'] = data['lnfer'] * data['irrigation1'] * data['irrigation1'] * data['tmp'] * data['tmp']
/Users/chenchenren/anaconda3/lib/python3.11/site-packages/statsmodels/base/model.py:1894: ValueWarning: covariance of constraints does not have full rank. The number of constraints is 1261, but rank is 12
  warnings.warn('covariance of constraints does not have full '
                            OLS Regression Results                            
==============================================================================
Dep. Variable:               lnyield   R-squared:                       0.739
Model:                            OLS   Adj. R-squared:                  0.686
Within R-squared:                 0.319
Method:                 Least Squares   F-statistic:                     6558362820053.0
Date:                Fri, 15 Nov 2024   Prob (F-statistic):               0.00e+00
Time:                        11:18:37   Log-Likelihood:                 3554.06
No. Observations:                7507   AIC:                            -4584.1
Df Residuals:                    6245   BIC:                             4153.5
Df Model:                         1261                                         
Covariance Type:               cluster                             
==============================================================================

                                            Coef.  Std.Err.          z          P>|z|     [0.025     0.975]
Intercept                              -38.381111  1.506789 -25.472124  4.015698e-143 -41.334363 -35.427859
lnfer                                    0.010936  0.009941   1.100146   2.712687e-01  -0.008547   0.030420
irrigation1                              0.024494  0.044406   0.551591   5.812284e-01  -0.062540   0.111528
irrigation12                            -0.002255  0.006684  -0.337387   7.358255e-01  -0.015357   0.010846
tmp                                      0.538854  0.035216  15.301419   7.480764e-53   0.469832   0.607876
tmp_tmp_interaction                     -0.013766  0.000845 -16.292207   1.121090e-59  -0.015422  -0.012110
year                                     0.019934  0.000743  26.820682  1.854482e-158   0.018477   0.021391
lnfer_irrigation1_tmp_interaction        0.002105  0.001276   1.649260   9.909437e-02  -0.000397   0.004606
lnfer_irrigation12_tmp_tmp_interaction  -0.000002  0.000006  -0.409528   6.821521e-01  -0.000013   0.000009
lnfer_irrigation1_pre_interaction       -0.012644  0.003686  -3.430637   6.021650e-04  -0.019868  -0.005420
lnfer_irrigation12_pre_pre_interaction   0.000119  0.000041   2.929187   3.398503e-03   0.000040   0.000199
pre                                      0.233074  0.016260  14.334505   1.331920e-46   0.201206   0.264942
pre_pre_interaction                     -0.014487  0.001155 -12.542262   4.383037e-36  -0.016751  -0.012223
In [11]:
#model 8

# Filter data for rainfed 
data = df_soybean[df_soybean['group'] == 2]

# Prepare the interaction terms
data['pre_pre_interaction'] = data['pre'] * data['pre']
data['lnfer_pre_interaction'] = data['lnfer'] * data['pre']
data['lnfer_pre_pre_interaction'] = data['lnfer'] * data['pre'] * data['pre']

data['tmp_tmp_interaction'] = data['tmp'] * data['tmp']
data['lnfer_tmp_interaction'] = data['lnfer'] * data['tmp']
data['lnfer_tmp_tmp_interaction'] = data['lnfer'] * data['tmp'] * data['tmp']
data1=data

# Define the formula for the regression model
formula = 'lnyield ~ lnfer  + tmp + tmp_tmp_interaction + pre +pre_pre_interaction+year+ lnfer_tmp_interaction+lnfer_tmp_tmp_interaction +lnfer_pre_interaction+ lnfer_pre_pre_interaction + C(geoid)'

# Run the regression using the formula interface with clustered standard errors
model = sm.OLS.from_formula(formula, data=data).fit(cov_type='cluster', cov_kwds={'groups': data['geoid']})

# Extract the summary table
summary = model.summary2().tables[1]

# Filter out rows corresponding to dummy variables for geoid
exclude_terms = ['C(geoid)']
pattern = '|'.join([re.escape(term) for term in exclude_terms])
filtered_summary8 = summary.loc[~summary.index.str.contains(pattern)]
data1['trend']=data1['year']
data1 = data1.set_index(['geoid','year'])
# Define the formula for the fixed effects regression model with PanelOLS
formula_fe = (
    'lnyield ~ lnfer  + tmp + tmp_tmp_interaction + pre +pre_pre_interaction+trend+ lnfer_tmp_interaction+lnfer_tmp_tmp_interaction +lnfer_pre_interaction+ lnfer_pre_pre_interaction + EntityEffects '
)
# Fit the fixed effects model using PanelOLS
fe_model = PanelOLS.from_formula(formula_fe, data=data1)
fe_results = fe_model.fit(cov_type='robust')

# Format the OLS summary with the within R-squared
ols_info = f"""
                            OLS Regression Results                            
==============================================================================
Dep. Variable:               lnyield   R-squared:                       {model.rsquared:.3f}
Model:                            OLS   Adj. R-squared:                  {model.rsquared_adj:.3f}
Within R-squared:               {fe_results.rsquared_within:.3f}
Method:                 Least Squares   F-statistic:                     {model.fvalue:.1f}
Date:                {datetime.now():%a, %d %b %Y}   Prob (F-statistic):               {model.f_pvalue:.2e}
Time:                        {datetime.now():%H:%M:%S}   Log-Likelihood:                 {model.llf:.2f}
No. Observations:                {model.nobs:.0f}   AIC:                            {model.aic:.1f}
Df Residuals:                    {model.df_resid:.0f}   BIC:                             {model.bic:.1f}
Df Model:                         {model.df_model:.0f}                                         
Covariance Type:               cluster                             
==============================================================================
"""

# Combine the OLS info with the filtered summary
final_summary8 = ols_info + "\n" + filtered_summary8.to_string()

# Print the final summary
print(final_summary8)
/var/folders/vd/0_phd7hx2n51y4412862zww00000gp/T/ipykernel_21901/4225165077.py:7: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data['pre_pre_interaction'] = data['pre'] * data['pre']
/var/folders/vd/0_phd7hx2n51y4412862zww00000gp/T/ipykernel_21901/4225165077.py:8: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data['lnfer_pre_interaction'] = data['lnfer'] * data['pre']
/var/folders/vd/0_phd7hx2n51y4412862zww00000gp/T/ipykernel_21901/4225165077.py:9: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data['lnfer_pre_pre_interaction'] = data['lnfer'] * data['pre'] * data['pre']
/var/folders/vd/0_phd7hx2n51y4412862zww00000gp/T/ipykernel_21901/4225165077.py:11: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data['tmp_tmp_interaction'] = data['tmp'] * data['tmp']
/var/folders/vd/0_phd7hx2n51y4412862zww00000gp/T/ipykernel_21901/4225165077.py:12: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data['lnfer_tmp_interaction'] = data['lnfer'] * data['tmp']
/var/folders/vd/0_phd7hx2n51y4412862zww00000gp/T/ipykernel_21901/4225165077.py:13: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data['lnfer_tmp_tmp_interaction'] = data['lnfer'] * data['tmp'] * data['tmp']
                            OLS Regression Results                            
==============================================================================
Dep. Variable:               lnyield   R-squared:                       0.749
Model:                            OLS   Adj. R-squared:                  0.701
Within R-squared:               0.340
Method:                 Least Squares   F-statistic:                     -1331501833556.8
Date:                Fri, 15 Nov 2024   Prob (F-statistic):               1.00e+00
Time:                        11:18:44   Log-Likelihood:                 5793.48
No. Observations:                10075   AIC:                            -8409.0
Df Residuals:                    8486   BIC:                             3060.1
Df Model:                         1588                                         
Covariance Type:               cluster                             
==============================================================================

                               Coef.  Std.Err.          z          P>|z|     [0.025     0.975]
Intercept                 -31.351247  1.147194 -27.328638  1.937995e-164 -33.599705 -29.102788
lnfer                      -1.437603  0.230179  -6.245593   4.221942e-10  -1.888745  -0.986461
tmp                         0.162487  0.050122   3.241813   1.187719e-03   0.064249   0.260725
tmp_tmp_interaction        -0.005111  0.001307  -3.910844   9.197420e-05  -0.007673  -0.002550
pre                         0.137877  0.022308   6.180549   6.387919e-10   0.094154   0.181601
pre_pre_interaction        -0.008889  0.001644  -5.407629   6.386448e-08  -0.012111  -0.005667
year                        0.018801  0.000549  34.229622  8.768058e-257   0.017724   0.019877
lnfer_tmp_interaction       0.110635  0.022230   4.976855   6.462567e-07   0.067065   0.154204
lnfer_tmp_tmp_interaction  -0.002177  0.000555  -3.926001   8.636975e-05  -0.003264  -0.001090
lnfer_pre_interaction       0.028565  0.012511   2.283232   2.241673e-02   0.004044   0.053085
lnfer_pre_pre_interaction  -0.001607  0.000873  -1.841397   6.556340e-02  -0.003317   0.000103
/Users/chenchenren/anaconda3/lib/python3.11/site-packages/statsmodels/base/model.py:1894: ValueWarning: covariance of constraints does not have full rank. The number of constraints is 1588, but rank is 10
  warnings.warn('covariance of constraints does not have full '
/var/folders/vd/0_phd7hx2n51y4412862zww00000gp/T/ipykernel_21901/4225165077.py:29: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data1['trend']=data1['year']
In [12]:
# Placeholder dictionary to store results for each final_summary
results_dict = {}

def add_significance_asterisks(coef, p_value):
    if p_value <= 0.01:
        return f"{round(coef, 3)}***"
    elif p_value <= 0.05:
        return f"{round(coef, 3)}**"
    elif p_value <= 0.1:
        return f"{round(coef, 3)}*"
    else:
        return f"{round(coef, 3)}"

# Define a function to process each final_summary DataFrame
def process_summary(summary, model_name):
    # Create a list to store the structured data for this summary
    structured_data = []
    for variable, row in summary.iterrows():
        # Apply significance asterisks to the coefficient based on p-value
        coef_with_significance = add_significance_asterisks(row['Coef.'], row['P>|z|'])
        
        # Format standard error with parentheses
        std_err_with_parentheses = f"({round(row['Std.Err.'], 3)})"
        
        # Append coefficient with significance, standard error with parentheses, and p-value as separate rows for each variable
        structured_data.append((variable, 'Coef.', coef_with_significance))
        structured_data.append((variable, 'Std.Err.', std_err_with_parentheses))
        structured_data.append((variable, 'P>|z|', round(row['P>|z|'], 3)))
    
    # Convert to DataFrame and return
    return pd.DataFrame(structured_data, columns=['Variable', 'Metric', 'Value']).set_index(['Variable', 'Metric'])

# List of your final summaries
filtered_summaries = [filtered_summary1, filtered_summary2, filtered_summary3, filtered_summary4, 
                   filtered_summary5, filtered_summary6, filtered_summary7, filtered_summary8]

# Loop over each final_summary and process it
for i, summary in enumerate(filtered_summaries, start=1):
    # Process each summary and add to the dictionary
    results_dict[f'Model_{i}'] = process_summary(summary, f'Model_{i}')

# Concatenate all model results along columns
final_results = pd.concat(results_dict, axis=1)

# Export to Excel
final_results.to_excel("model_summary_table12.xlsx")
print(final_results)
print("Model summary statistics with significance have been saved to model_summary_statistics_with_significance.xlsx")
                                                    Model_1     Model_2  \
                                                      Value       Value   
Variable                               Metric                             
Intercept                              Coef.     -38.522***  -33.002***   
                                       Std.Err.     (1.519)     (1.074)   
                                       P>|z|            0.0         0.0   
lnfer                                  Coef.          0.006       0.003   
                                       Std.Err.      (0.01)     (0.008)   
                                       P>|z|          0.548       0.734   
irrigation1                            Coef.         -0.016         NaN   
                                       Std.Err.     (0.016)         NaN   
                                       P>|z|          0.326         NaN   
irrigation12                           Coef.        0.004**         NaN   
                                       Std.Err.     (0.002)         NaN   
                                       P>|z|          0.044         NaN   
tmp                                    Coef.       0.521***    0.287***   
                                       Std.Err.     (0.035)     (0.024)   
                                       P>|z|            0.0         0.0   
tmp_tmp_interaction                    Coef.      -0.013***   -0.007***   
                                       Std.Err.     (0.001)     (0.001)   
                                       P>|z|            0.0         0.0   
pre                                    Coef.       0.227***    0.181***   
                                       Std.Err.     (0.016)     (0.012)   
                                       P>|z|            0.0         0.0   
pre_pre_interaction                    Coef.      -0.014***   -0.011***   
                                       Std.Err.     (0.001)     (0.001)   
                                       P>|z|            0.0         0.0   
year                                   Coef.        0.02***    0.019***   
                                       Std.Err.     (0.001)     (0.001)   
                                       P>|z|            0.0         0.0   
lnfer_irrigation1_tmp_interaction      Coef.            NaN         NaN   
                                       Std.Err.         NaN         NaN   
                                       P>|z|            NaN         NaN   
lnfer_irrigation12_tmp_tmp_interaction Coef.            NaN         NaN   
                                       Std.Err.         NaN         NaN   
                                       P>|z|            NaN         NaN   
lnfer_tmp_interaction                  Coef.            NaN         NaN   
                                       Std.Err.         NaN         NaN   
                                       P>|z|            NaN         NaN   
lnfer_tmp_tmp_interaction              Coef.            NaN         NaN   
                                       Std.Err.         NaN         NaN   
                                       P>|z|            NaN         NaN   
lnfer_irrigation1_pre_interaction      Coef.            NaN         NaN   
                                       Std.Err.         NaN         NaN   
                                       P>|z|            NaN         NaN   
lnfer_irrigation12_pre_pre_interaction Coef.            NaN         NaN   
                                       Std.Err.         NaN         NaN   
                                       P>|z|            NaN         NaN   
lnfer_pre_interaction                  Coef.            NaN         NaN   
                                       Std.Err.         NaN         NaN   
                                       P>|z|            NaN         NaN   
lnfer_pre_pre_interaction              Coef.            NaN         NaN   
                                       Std.Err.         NaN         NaN   
                                       P>|z|            NaN         NaN   

                                                    Model_3     Model_4  \
                                                      Value       Value   
Variable                               Metric                             
Intercept                              Coef.     -38.321***  -31.877***   
                                       Std.Err.      (1.51)      (1.13)   
                                       P>|z|            0.0         0.0   
lnfer                                  Coef.          0.011    -1.44***   
                                       Std.Err.      (0.01)     (0.225)   
                                       P>|z|          0.278         0.0   
irrigation1                            Coef.           0.05         NaN   
                                       Std.Err.     (0.045)         NaN   
                                       P>|z|          0.265         NaN   
irrigation12                           Coef.         -0.005         NaN   
                                       Std.Err.     (0.007)         NaN   
                                       P>|z|          0.446         NaN   
tmp                                    Coef.        0.52***    0.142***   
                                       Std.Err.     (0.035)      (0.05)   
                                       P>|z|            0.0       0.005   
tmp_tmp_interaction                    Coef.      -0.013***   -0.005***   
                                       Std.Err.     (0.001)     (0.001)   
                                       P>|z|            0.0         0.0   
pre                                    Coef.       0.228***    0.173***   
                                       Std.Err.     (0.016)     (0.012)   
                                       P>|z|            0.0         0.0   
pre_pre_interaction                    Coef.      -0.014***   -0.011***   
                                       Std.Err.     (0.001)     (0.001)   
                                       P>|z|            0.0         0.0   
year                                   Coef.        0.02***    0.019***   
                                       Std.Err.     (0.001)     (0.001)   
                                       P>|z|            0.0         0.0   
lnfer_irrigation1_tmp_interaction      Coef.         -0.001         NaN   
                                       Std.Err.     (0.001)         NaN   
                                       P>|z|          0.101         NaN   
lnfer_irrigation12_tmp_tmp_interaction Coef.            0.0         NaN   
                                       Std.Err.       (0.0)         NaN   
                                       P>|z|          0.179         NaN   
lnfer_tmp_interaction                  Coef.            NaN    0.122***   
                                       Std.Err.         NaN     (0.023)   
                                       P>|z|            NaN         0.0   
lnfer_tmp_tmp_interaction              Coef.            NaN   -0.002***   
                                       Std.Err.         NaN     (0.001)   
                                       P>|z|            NaN         0.0   
lnfer_irrigation1_pre_interaction      Coef.            NaN         NaN   
                                       Std.Err.         NaN         NaN   
                                       P>|z|            NaN         NaN   
lnfer_irrigation12_pre_pre_interaction Coef.            NaN         NaN   
                                       Std.Err.         NaN         NaN   
                                       P>|z|            NaN         NaN   
lnfer_pre_interaction                  Coef.            NaN         NaN   
                                       Std.Err.         NaN         NaN   
                                       P>|z|            NaN         NaN   
lnfer_pre_pre_interaction              Coef.            NaN         NaN   
                                       Std.Err.         NaN         NaN   
                                       P>|z|            NaN         NaN   

                                                    Model_5     Model_6  \
                                                      Value       Value   
Variable                               Metric                             
Intercept                              Coef.     -38.309***  -32.266***   
                                       Std.Err.     (1.504)     (1.082)   
                                       P>|z|            0.0         0.0   
lnfer                                  Coef.          0.013    -0.098**   
                                       Std.Err.      (0.01)     (0.044)   
                                       P>|z|          0.204       0.027   
irrigation1                            Coef.         0.051*         NaN   
                                       Std.Err.     (0.027)         NaN   
                                       P>|z|           0.06         NaN   
irrigation12                           Coef.         -0.001         NaN   
                                       Std.Err.     (0.002)         NaN   
                                       P>|z|          0.776         NaN   
tmp                                    Coef.       0.527***    0.288***   
                                       Std.Err.     (0.035)     (0.024)   
                                       P>|z|            0.0         0.0   
tmp_tmp_interaction                    Coef.      -0.013***   -0.007***   
                                       Std.Err.     (0.001)     (0.001)   
                                       P>|z|            0.0         0.0   
pre                                    Coef.       0.232***    0.152***   
                                       Std.Err.     (0.016)     (0.023)   
                                       P>|z|            0.0         0.0   
pre_pre_interaction                    Coef.      -0.015***    -0.01***   
                                       Std.Err.     (0.001)     (0.002)   
                                       P>|z|            0.0         0.0   
year                                   Coef.        0.02***    0.018***   
                                       Std.Err.     (0.001)     (0.001)   
                                       P>|z|            0.0         0.0   
lnfer_irrigation1_tmp_interaction      Coef.            NaN         NaN   
                                       Std.Err.         NaN         NaN   
                                       P>|z|            NaN         NaN   
lnfer_irrigation12_tmp_tmp_interaction Coef.            NaN         NaN   
                                       Std.Err.         NaN         NaN   
                                       P>|z|            NaN         NaN   
lnfer_tmp_interaction                  Coef.            NaN         NaN   
                                       Std.Err.         NaN         NaN   
                                       P>|z|            NaN         NaN   
lnfer_tmp_tmp_interaction              Coef.            NaN         NaN   
                                       Std.Err.         NaN         NaN   
                                       P>|z|            NaN         NaN   
lnfer_irrigation1_pre_interaction      Coef.      -0.006***         NaN   
                                       Std.Err.     (0.002)         NaN   
                                       P>|z|          0.002         NaN   
lnfer_irrigation12_pre_pre_interaction Coef.          0.0**         NaN   
                                       Std.Err.       (0.0)         NaN   
                                       P>|z|          0.037         NaN   
lnfer_pre_interaction                  Coef.            NaN      0.025*   
                                       Std.Err.         NaN     (0.013)   
                                       P>|z|            NaN       0.052   
lnfer_pre_pre_interaction              Coef.            NaN      -0.001   
                                       Std.Err.         NaN     (0.001)   
                                       P>|z|            NaN       0.149   

                                                    Model_7     Model_8  
                                                      Value       Value  
Variable                               Metric                            
Intercept                              Coef.     -38.381***  -31.351***  
                                       Std.Err.     (1.507)     (1.147)  
                                       P>|z|            0.0         0.0  
lnfer                                  Coef.          0.011   -1.438***  
                                       Std.Err.      (0.01)      (0.23)  
                                       P>|z|          0.271         0.0  
irrigation1                            Coef.          0.024         NaN  
                                       Std.Err.     (0.044)         NaN  
                                       P>|z|          0.581         NaN  
irrigation12                           Coef.         -0.002         NaN  
                                       Std.Err.     (0.007)         NaN  
                                       P>|z|          0.736         NaN  
tmp                                    Coef.       0.539***    0.162***  
                                       Std.Err.     (0.035)      (0.05)  
                                       P>|z|            0.0       0.001  
tmp_tmp_interaction                    Coef.      -0.014***   -0.005***  
                                       Std.Err.     (0.001)     (0.001)  
                                       P>|z|            0.0         0.0  
pre                                    Coef.       0.233***    0.138***  
                                       Std.Err.     (0.016)     (0.022)  
                                       P>|z|            0.0         0.0  
pre_pre_interaction                    Coef.      -0.014***   -0.009***  
                                       Std.Err.     (0.001)     (0.002)  
                                       P>|z|            0.0         0.0  
year                                   Coef.        0.02***    0.019***  
                                       Std.Err.     (0.001)     (0.001)  
                                       P>|z|            0.0         0.0  
lnfer_irrigation1_tmp_interaction      Coef.         0.002*         NaN  
                                       Std.Err.     (0.001)         NaN  
                                       P>|z|          0.099         NaN  
lnfer_irrigation12_tmp_tmp_interaction Coef.           -0.0         NaN  
                                       Std.Err.       (0.0)         NaN  
                                       P>|z|          0.682         NaN  
lnfer_tmp_interaction                  Coef.            NaN    0.111***  
                                       Std.Err.         NaN     (0.022)  
                                       P>|z|            NaN         0.0  
lnfer_tmp_tmp_interaction              Coef.            NaN   -0.002***  
                                       Std.Err.         NaN     (0.001)  
                                       P>|z|            NaN         0.0  
lnfer_irrigation1_pre_interaction      Coef.      -0.013***         NaN  
                                       Std.Err.     (0.004)         NaN  
                                       P>|z|          0.001         NaN  
lnfer_irrigation12_pre_pre_interaction Coef.         0.0***         NaN  
                                       Std.Err.       (0.0)         NaN  
                                       P>|z|          0.003         NaN  
lnfer_pre_interaction                  Coef.            NaN     0.029**  
                                       Std.Err.         NaN     (0.013)  
                                       P>|z|            NaN       0.022  
lnfer_pre_pre_interaction              Coef.            NaN     -0.002*  
                                       Std.Err.         NaN     (0.001)  
                                       P>|z|            NaN       0.066  
Model summary statistics with significance have been saved to model_summary_statistics_with_significance.xlsx

Table S14¶

In [13]:
#model 1

# Filter data for irrigated 
data = df_soybean[df_soybean['group'] == 1]

# Prepare the interaction terms
data['pre_pre_interaction'] = data['pre'] * data['pre']
data['irrigation12'] = data['irrigation1'] * data['irrigation1']
data['lnfer_irrigation1_pre_interaction'] = data['lnfer'] * data['irrigation1'] * data['pre']
data['lnfer_irrigation12_pre_pre_interaction'] = data['lnfer'] * data['irrigation1'] * data['irrigation1'] * data['pre'] * data['pre']

data['tmp_tmp_interaction'] = data['tmp'] * data['tmp']
data['lnfer_irrigation1_tmp_interaction'] = data['lnfer'] * data['irrigation1'] * data['tmp']
data['lnfer_irrigation12_tmp_tmp_interaction'] = data['lnfer'] * data['irrigation1'] * data['irrigation1'] * data['tmp'] * data['tmp']

data['year_geoid'] = data['year'] * data['geoid']

# Define the formula for the regression model
formula = 'lnyield ~ lnfer + irrigation1 + irrigation12 + tmp + tmp_tmp_interaction + pre + pre_pre_interaction +year + year_geoid+ C(geoid)'

# Run the regression using the formula interface with clustered standard errors
model = sm.OLS.from_formula(formula, data=data).fit(cov_type='cluster', cov_kwds={'groups': data['geoid']})

# Extract the summary table
summary = model.summary2().tables[1]

# Filter out rows corresponding to dummy variables for geoid
exclude_terms = ['C(geoid)']
pattern = '|'.join([re.escape(term) for term in exclude_terms])
filtered_summary1 = summary.loc[~summary.index.str.contains(pattern)]

fixed_effects_only_model = sm.OLS.from_formula('lnyield ~ C(geoid)', data=data).fit()

# Calculate the total sum of squares (TSS) using the model with only fixed effects
tss_within = np.sum((fixed_effects_only_model.resid) ** 2)

# Calculate the residual sum of squares (RSS) from your main model
rss_within = np.sum((model.resid) ** 2)

# Calculate the within R-squared
within_r2 = 1 - (rss_within / tss_within)

# Format the OLS summary with the within R-squared
ols_info = f"""
                            OLS Regression Results                            
==============================================================================
Dep. Variable:               lnyield   R-squared:                       {model.rsquared:.3f}
Model:                            OLS   Adj. R-squared:                  {model.rsquared_adj:.3f}
Within R-squared:                 {within_r2:.3f}
Method:                 Least Squares   F-statistic:                     {model.fvalue:.1f}
Date:                {datetime.now():%a, %d %b %Y}   Prob (F-statistic):               {model.f_pvalue:.2e}
Time:                        {datetime.now():%H:%M:%S}   Log-Likelihood:                 {model.llf:.2f}
No. Observations:                {model.nobs:.0f}   AIC:                            {model.aic:.1f}
Df Residuals:                    {model.df_resid:.0f}   BIC:                             {model.bic:.1f}
Df Model:                         {model.df_model:.0f}                                         
Covariance Type:               cluster                             
==============================================================================
"""

# Combine the OLS info with the filtered summary
final_summary1 = ols_info + "\n" + filtered_summary1.to_string()

# Print the final summary
print(final_summary1)
/var/folders/vd/0_phd7hx2n51y4412862zww00000gp/T/ipykernel_21901/1789790976.py:7: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data['pre_pre_interaction'] = data['pre'] * data['pre']
/var/folders/vd/0_phd7hx2n51y4412862zww00000gp/T/ipykernel_21901/1789790976.py:8: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data['irrigation12'] = data['irrigation1'] * data['irrigation1']
/var/folders/vd/0_phd7hx2n51y4412862zww00000gp/T/ipykernel_21901/1789790976.py:9: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data['lnfer_irrigation1_pre_interaction'] = data['lnfer'] * data['irrigation1'] * data['pre']
/var/folders/vd/0_phd7hx2n51y4412862zww00000gp/T/ipykernel_21901/1789790976.py:10: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data['lnfer_irrigation12_pre_pre_interaction'] = data['lnfer'] * data['irrigation1'] * data['irrigation1'] * data['pre'] * data['pre']
/var/folders/vd/0_phd7hx2n51y4412862zww00000gp/T/ipykernel_21901/1789790976.py:12: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data['tmp_tmp_interaction'] = data['tmp'] * data['tmp']
/var/folders/vd/0_phd7hx2n51y4412862zww00000gp/T/ipykernel_21901/1789790976.py:13: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data['lnfer_irrigation1_tmp_interaction'] = data['lnfer'] * data['irrigation1'] * data['tmp']
/var/folders/vd/0_phd7hx2n51y4412862zww00000gp/T/ipykernel_21901/1789790976.py:14: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data['lnfer_irrigation12_tmp_tmp_interaction'] = data['lnfer'] * data['irrigation1'] * data['irrigation1'] * data['tmp'] * data['tmp']
/var/folders/vd/0_phd7hx2n51y4412862zww00000gp/T/ipykernel_21901/1789790976.py:16: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data['year_geoid'] = data['year'] * data['geoid']
/Users/chenchenren/anaconda3/lib/python3.11/site-packages/statsmodels/base/model.py:1894: ValueWarning: covariance of constraints does not have full rank. The number of constraints is 1258, but rank is 9
  warnings.warn('covariance of constraints does not have full '
                            OLS Regression Results                            
==============================================================================
Dep. Variable:               lnyield   R-squared:                       0.737
Model:                            OLS   Adj. R-squared:                  0.684
Within R-squared:                 0.315
Method:                 Least Squares   F-statistic:                     231973491.2
Date:                Fri, 15 Nov 2024   Prob (F-statistic):               0.00e+00
Time:                        11:18:49   Log-Likelihood:                 3532.31
No. Observations:                7507   AIC:                            -4546.6
Df Residuals:                    6248   BIC:                             4170.2
Df Model:                         1258                                         
Covariance Type:               cluster                             
==============================================================================

                            Coef.      Std.Err.          z         P>|z|        [0.025        0.975]
Intercept           -3.993741e+01  3.532505e+00 -11.305690  1.229830e-29 -4.686100e+01 -3.301383e+01
lnfer                6.212019e-03  1.003526e-02   0.619019  5.359038e-01 -1.345673e-02  2.588077e-02
irrigation1         -1.579472e-02  1.624024e-02  -0.972567  3.307686e-01 -4.762499e-02  1.603556e-02
irrigation12         3.696676e-03  1.836165e-03   2.013259  4.408741e-02  9.785812e-05  7.295494e-03
tmp                  5.204956e-01  3.507404e-02  14.839911  8.086338e-50  4.517517e-01  5.892394e-01
tmp_tmp_interaction -1.331900e-02  8.415018e-04 -15.827655  2.005586e-56 -1.496831e-02 -1.166969e-02
pre                  2.269818e-01  1.626871e-02  13.952047  3.057039e-44  1.950957e-01  2.588678e-01
pre_pre_interaction -1.425789e-02  1.160790e-03 -12.282920  1.118853e-34 -1.653300e-02 -1.198279e-02
year                 2.083987e-02  1.813570e-03  11.491076  1.462790e-30  1.728534e-02  2.439440e-02
year_geoid          -2.662822e-08  5.888926e-08  -0.452175  6.511433e-01 -1.420491e-07  8.879261e-08
In [14]:
#model 2

# Filter data for rainfed 
data = df_soybean[df_soybean['group'] == 2]

# Prepare the interaction terms
data['pre_pre_interaction'] = data['pre'] * data['pre']
data['lnfer_pre_interaction'] = data['lnfer'] * data['pre']
data['lnfer_pre_pre_interaction'] = data['lnfer'] * data['pre'] * data['pre']

data['tmp_tmp_interaction'] = data['tmp'] * data['tmp']
data['lnfer_tmp_interaction'] = data['lnfer'] * data['tmp']
data['lnfer_tmp_tmp_interaction'] = data['lnfer'] * data['tmp'] * data['tmp']
data['year_geoid'] = data['year'] * data['geoid']
data1=data

# Define the formula for the regression model
formula = 'lnyield ~ lnfer  + tmp + tmp_tmp_interaction + pre + pre_pre_interaction +year + year_geoid+ C(geoid)'

# Run the regression using the formula interface with clustered standard errors
model = sm.OLS.from_formula(formula, data=data1).fit(cov_type='cluster', cov_kwds={'groups': data['geoid']})

# Extract the summary table
summary = model.summary2().tables[1]

# Filter out rows corresponding to dummy variables for geoid
exclude_terms = ['C(geoid)']
pattern = '|'.join([re.escape(term) for term in exclude_terms])
filtered_summary2 = summary.loc[~summary.index.str.contains(pattern)]

data1['trend']=data1['year']
data1 = data1.set_index(['geoid','year'])
# Define the formula for the fixed effects regression model with PanelOLS
formula_fe = (
    'lnyield ~ lnfer + tmp + tmp_tmp_interaction + pre + pre_pre_interaction +trend+ year_geoid+ EntityEffects '
)

# Fit the fixed effects model using PanelOLS
fe_model = PanelOLS.from_formula(formula_fe, data=data1)
fe_results = fe_model.fit(cov_type='robust')


# Format the OLS summary with the within R-squared
ols_info = f"""
                            OLS Regression Results                            
==============================================================================
Dep. Variable:               lnyield   R-squared:                       {model.rsquared:.3f}
Model:                            OLS   Adj. R-squared:                  {model.rsquared_adj:.3f}
Within R-squared:                 {fe_results.rsquared_within:.3f}
Method:                 Least Squares   F-statistic:                     {model.fvalue:.1f}
Date:                {datetime.now():%a, %d %b %Y}   Prob (F-statistic):               {model.f_pvalue:.2e}
Time:                        {datetime.now():%H:%M:%S}   Log-Likelihood:                 {model.llf:.2f}
No. Observations:                {model.nobs:.0f}   AIC:                            {model.aic:.1f}
Df Residuals:                    {model.df_resid:.0f}   BIC:                             {model.bic:.1f}
Df Model:                         {model.df_model:.0f}                                         
Covariance Type:               cluster                             
==============================================================================
"""

# Combine the OLS info with the filtered summary
final_summary2 = ols_info + "\n" + filtered_summary2.to_string()

# Print the final summary
print(final_summary2)
/var/folders/vd/0_phd7hx2n51y4412862zww00000gp/T/ipykernel_21901/124981403.py:7: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data['pre_pre_interaction'] = data['pre'] * data['pre']
/var/folders/vd/0_phd7hx2n51y4412862zww00000gp/T/ipykernel_21901/124981403.py:8: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data['lnfer_pre_interaction'] = data['lnfer'] * data['pre']
/var/folders/vd/0_phd7hx2n51y4412862zww00000gp/T/ipykernel_21901/124981403.py:9: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data['lnfer_pre_pre_interaction'] = data['lnfer'] * data['pre'] * data['pre']
/var/folders/vd/0_phd7hx2n51y4412862zww00000gp/T/ipykernel_21901/124981403.py:11: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data['tmp_tmp_interaction'] = data['tmp'] * data['tmp']
/var/folders/vd/0_phd7hx2n51y4412862zww00000gp/T/ipykernel_21901/124981403.py:12: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data['lnfer_tmp_interaction'] = data['lnfer'] * data['tmp']
/var/folders/vd/0_phd7hx2n51y4412862zww00000gp/T/ipykernel_21901/124981403.py:13: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data['lnfer_tmp_tmp_interaction'] = data['lnfer'] * data['tmp'] * data['tmp']
/var/folders/vd/0_phd7hx2n51y4412862zww00000gp/T/ipykernel_21901/124981403.py:14: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data['year_geoid'] = data['year'] * data['geoid']
                            OLS Regression Results                            
==============================================================================
Dep. Variable:               lnyield   R-squared:                       0.742
Model:                            OLS   Adj. R-squared:                  0.694
Within R-squared:                 0.322
Method:                 Least Squares   F-statistic:                     10177080987.7
Date:                Fri, 15 Nov 2024   Prob (F-statistic):               0.00e+00
Time:                        11:18:54   Log-Likelihood:                 5660.02
No. Observations:                10075   AIC:                            -8150.0
Df Residuals:                    8490   BIC:                             3290.2
Df Model:                         1584                                         
Covariance Type:               cluster                             
==============================================================================

                            Coef.      Std.Err.          z         P>|z|        [0.025        0.975]
Intercept           -4.615970e+01  2.443103e+00 -18.893877  1.280843e-79 -5.094809e+01 -4.137130e+01
lnfer                3.263710e-03  7.599801e-03   0.429447  6.675981e-01 -1.163163e-02  1.815905e-02
tmp                  2.946470e-01  2.363305e-02  12.467583  1.121769e-35  2.483271e-01  3.409669e-01
tmp_tmp_interaction -7.618231e-03  6.167671e-04 -12.351876  4.758848e-35 -8.827073e-03 -6.409390e-03
pre                  1.789964e-01  1.241803e-02  14.414236  4.210707e-47  1.546575e-01  2.033353e-01
pre_pre_interaction -1.109359e-02  8.885902e-04 -12.484486  9.072700e-36 -1.283520e-02 -9.351988e-03
year                 2.546785e-02  1.258349e-03  20.239096  4.432263e-91  2.300153e-02  2.793417e-02
year_geoid          -2.143577e-07  3.744462e-08  -5.724659  1.036415e-08 -2.877478e-07 -1.409676e-07
/Users/chenchenren/anaconda3/lib/python3.11/site-packages/statsmodels/base/model.py:1894: ValueWarning: covariance of constraints does not have full rank. The number of constraints is 1585, but rank is 7
  warnings.warn('covariance of constraints does not have full '
/var/folders/vd/0_phd7hx2n51y4412862zww00000gp/T/ipykernel_21901/124981403.py:31: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data1['trend']=data1['year']
In [15]:
#model 3

# Filter data for irrigated 
data = df_soybean[df_soybean['group'] == 1]

# Prepare the interaction terms
data['pre_pre_interaction'] = data['pre'] * data['pre']
data['irrigation12'] = data['irrigation1'] * data['irrigation1']
data['lnfer_irrigation1_pre_interaction'] = data['lnfer'] * data['irrigation1'] * data['pre']
data['lnfer_irrigation12_pre_pre_interaction'] = data['lnfer'] * data['irrigation1'] * data['irrigation1'] * data['pre'] * data['pre']

data['tmp_tmp_interaction'] = data['tmp'] * data['tmp']
data['lnfer_irrigation1_tmp_interaction'] = data['lnfer'] * data['irrigation1'] * data['tmp']
data['lnfer_irrigation12_tmp_tmp_interaction'] = data['lnfer'] * data['irrigation1'] * data['irrigation1'] * data['tmp'] * data['tmp']
data['year_geoid'] = data['year'] * data['geoid']

# Define the formula for the regression model
formula = 'lnyield ~ lnfer + irrigation1 + irrigation12 + tmp + tmp_tmp_interaction + pre + pre_pre_interaction +year+lnfer_irrigation1_tmp_interaction+ lnfer_irrigation12_tmp_tmp_interaction+ year_geoid+ C(geoid)'

# Run the regression using the formula interface with clustered standard errors
model = sm.OLS.from_formula(formula, data=data).fit(cov_type='cluster', cov_kwds={'groups': data['geoid']})

# Extract the summary table
summary = model.summary2().tables[1]

# Filter out rows corresponding to dummy variables for geoid
exclude_terms = ['C(geoid)']
pattern = '|'.join([re.escape(term) for term in exclude_terms])
filtered_summary3 = summary.loc[~summary.index.str.contains(pattern)]

fixed_effects_only_model = sm.OLS.from_formula('lnyield ~ C(geoid)', data=data).fit()

# Calculate the total sum of squares (TSS) using the model with only fixed effects
tss_within = np.sum((fixed_effects_only_model.resid) ** 2)

# Calculate the residual sum of squares (RSS) from your main model
rss_within = np.sum((model.resid) ** 2)

# Calculate the within R-squared
within_r2 = 1 - (rss_within / tss_within)

# Format the OLS summary with the within R-squared
ols_info = f"""
                            OLS Regression Results                            
==============================================================================
Dep. Variable:               lnyield   R-squared:                       {model.rsquared:.3f}
Model:                            OLS   Adj. R-squared:                  {model.rsquared_adj:.3f}
Within R-squared:                 {within_r2:.3f}
Method:                 Least Squares   F-statistic:                     {model.fvalue:.1f}
Date:                {datetime.now():%a, %d %b %Y}   Prob (F-statistic):               {model.f_pvalue:.2e}
Time:                        {datetime.now():%H:%M:%S}   Log-Likelihood:                 {model.llf:.2f}
No. Observations:                {model.nobs:.0f}   AIC:                            {model.aic:.1f}
Df Residuals:                    {model.df_resid:.0f}   BIC:                             {model.bic:.1f}
Df Model:                         {model.df_model:.0f}                                         
Covariance Type:               cluster                             
==============================================================================
"""

# Combine the OLS info with the filtered summary
final_summary3 = ols_info + "\n" + filtered_summary3.to_string()

# Print the final summary
print(final_summary3)
/var/folders/vd/0_phd7hx2n51y4412862zww00000gp/T/ipykernel_21901/3002633633.py:7: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data['pre_pre_interaction'] = data['pre'] * data['pre']
/var/folders/vd/0_phd7hx2n51y4412862zww00000gp/T/ipykernel_21901/3002633633.py:8: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data['irrigation12'] = data['irrigation1'] * data['irrigation1']
/var/folders/vd/0_phd7hx2n51y4412862zww00000gp/T/ipykernel_21901/3002633633.py:9: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data['lnfer_irrigation1_pre_interaction'] = data['lnfer'] * data['irrigation1'] * data['pre']
/var/folders/vd/0_phd7hx2n51y4412862zww00000gp/T/ipykernel_21901/3002633633.py:10: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data['lnfer_irrigation12_pre_pre_interaction'] = data['lnfer'] * data['irrigation1'] * data['irrigation1'] * data['pre'] * data['pre']
/var/folders/vd/0_phd7hx2n51y4412862zww00000gp/T/ipykernel_21901/3002633633.py:12: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data['tmp_tmp_interaction'] = data['tmp'] * data['tmp']
/var/folders/vd/0_phd7hx2n51y4412862zww00000gp/T/ipykernel_21901/3002633633.py:13: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data['lnfer_irrigation1_tmp_interaction'] = data['lnfer'] * data['irrigation1'] * data['tmp']
/var/folders/vd/0_phd7hx2n51y4412862zww00000gp/T/ipykernel_21901/3002633633.py:14: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data['lnfer_irrigation12_tmp_tmp_interaction'] = data['lnfer'] * data['irrigation1'] * data['irrigation1'] * data['tmp'] * data['tmp']
/var/folders/vd/0_phd7hx2n51y4412862zww00000gp/T/ipykernel_21901/3002633633.py:15: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data['year_geoid'] = data['year'] * data['geoid']
/Users/chenchenren/anaconda3/lib/python3.11/site-packages/statsmodels/base/model.py:1894: ValueWarning: covariance of constraints does not have full rank. The number of constraints is 1260, but rank is 11
  warnings.warn('covariance of constraints does not have full '
                            OLS Regression Results                            
==============================================================================
Dep. Variable:               lnyield   R-squared:                       0.737
Model:                            OLS   Adj. R-squared:                  0.684
Within R-squared:                 0.316
Method:                 Least Squares   F-statistic:                     6605558130.0
Date:                Fri, 15 Nov 2024   Prob (F-statistic):               0.00e+00
Time:                        11:18:59   Log-Likelihood:                 3536.22
No. Observations:                7507   AIC:                            -4550.4
Df Residuals:                    6246   BIC:                             4180.2
Df Model:                         1260                                         
Covariance Type:               cluster                             
==============================================================================

                                               Coef.      Std.Err.          z         P>|z|        [0.025        0.975]
Intercept                              -4.017184e+01  3.524811e+00 -11.396879  4.333771e-30 -4.708035e+01 -3.326334e+01
lnfer                                   1.117614e-02  9.967509e-03   1.121257  2.621785e-01 -8.359820e-03  3.071210e-02
irrigation1                             5.200204e-02  4.505940e-02   1.154078  2.484683e-01 -3.631275e-02  1.403168e-01
irrigation12                           -5.614136e-03  7.045565e-03  -0.796833  4.255483e-01 -1.942319e-02  8.194917e-03
tmp                                     5.187373e-01  3.482357e-02  14.896156  3.490943e-50  4.504844e-01  5.869902e-01
tmp_tmp_interaction                    -1.326847e-02  8.345532e-04 -15.898891  6.449685e-57 -1.490416e-02 -1.163278e-02
pre                                     2.280190e-01  1.639217e-02  13.910234  5.489920e-44  1.958909e-01  2.601470e-01
pre_pre_interaction                    -1.433377e-02  1.170808e-03 -12.242638  1.839547e-34 -1.662851e-02 -1.203903e-02
year                                    2.096292e-02  1.809563e-03  11.584521  4.937210e-31  1.741624e-02  2.450960e-02
lnfer_irrigation1_tmp_interaction      -1.295372e-03  7.666021e-04  -1.689758  9.107423e-02 -2.797885e-03  2.071404e-04
lnfer_irrigation12_tmp_tmp_interaction  7.094800e-06  5.124475e-06   1.384493  1.662075e-01 -2.948985e-06  1.713859e-05
year_geoid                             -3.491327e-08  5.879304e-08  -0.593833  5.526236e-01 -1.501455e-07  8.031897e-08
In [16]:
#model 4

# Filter data for rainfed 
data = df_soybean[df_soybean['group'] == 2]

# Prepare the interaction terms
data['pre_pre_interaction'] = data['pre'] * data['pre']
data['lnfer_pre_interaction'] = data['lnfer'] * data['pre']
data['lnfer_pre_pre_interaction'] = data['lnfer'] * data['pre'] * data['pre']

data['tmp_tmp_interaction'] = data['tmp'] * data['tmp']
data['lnfer_tmp_interaction'] = data['lnfer'] * data['tmp']
data['lnfer_tmp_tmp_interaction'] = data['lnfer'] * data['tmp'] * data['tmp']
data['year_geoid'] = data['year'] * data['geoid']

data1=data

# Define the formula for the regression model
formula = 'lnyield ~ lnfer  + tmp + tmp_tmp_interaction + pre + pre_pre_interaction +year+ year_geoid +lnfer_tmp_interaction+ lnfer_tmp_tmp_interaction + C(geoid)'

# Run the regression using the formula interface with clustered standard errors
model = sm.OLS.from_formula(formula, data=data).fit(cov_type='cluster', cov_kwds={'groups': data['geoid']})

# Extract the summary table
summary = model.summary2().tables[1]

# Filter out rows corresponding to dummy variables for geoid
exclude_terms = ['C(geoid)']
pattern = '|'.join([re.escape(term) for term in exclude_terms])
filtered_summary4 = summary.loc[~summary.index.str.contains(pattern)]
data1['trend']=data1['year']
# Calculate the total sum of squares (TSS) using the model with only fixed effects
data1 = data1.set_index(['geoid','year'])
# Define the formula for the fixed effects regression model with PanelOLS
formula_fe = (
    'lnyield ~ lnfer  + tmp + tmp_tmp_interaction + pre + pre_pre_interaction +lnfer_tmp_interaction+ lnfer_tmp_tmp_interaction +trend+ year_geoid+ EntityEffects '
)
# Fit the fixed effects model using PanelOLS
fe_model = PanelOLS.from_formula(formula_fe, data=data1)
fe_results = fe_model.fit(cov_type='robust')

# Format the OLS summary with the within R-squared
ols_info = f"""
                            OLS Regression Results                            
==============================================================================
Dep. Variable:               lnyield   R-squared:                       {model.rsquared:.3f}
Model:                            OLS   Adj. R-squared:                  {model.rsquared_adj:.3f}
Within R-squared:                 {fe_results.rsquared_within:.3f}
Method:                 Least Squares   F-statistic:                     {model.fvalue:.1f}
Date:                {datetime.now():%a, %d %b %Y}   Prob (F-statistic):               {model.f_pvalue:.2e}
Time:                        {datetime.now():%H:%M:%S}   Log-Likelihood:                 {model.llf:.2f}
No. Observations:                {model.nobs:.0f}   AIC:                            {model.aic:.1f}
Df Residuals:                    {model.df_resid:.0f}   BIC:                             {model.bic:.1f}
Df Model:                         {model.df_model:.0f}                                         
Covariance Type:               cluster                             
==============================================================================
"""

# Combine the OLS info with the filtered summary
final_summary4 = ols_info + "\n" + filtered_summary4.to_string()

# Print the final summary
print(final_summary4)
/var/folders/vd/0_phd7hx2n51y4412862zww00000gp/T/ipykernel_21901/3865254653.py:7: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data['pre_pre_interaction'] = data['pre'] * data['pre']
/var/folders/vd/0_phd7hx2n51y4412862zww00000gp/T/ipykernel_21901/3865254653.py:8: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data['lnfer_pre_interaction'] = data['lnfer'] * data['pre']
/var/folders/vd/0_phd7hx2n51y4412862zww00000gp/T/ipykernel_21901/3865254653.py:9: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data['lnfer_pre_pre_interaction'] = data['lnfer'] * data['pre'] * data['pre']
/var/folders/vd/0_phd7hx2n51y4412862zww00000gp/T/ipykernel_21901/3865254653.py:11: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data['tmp_tmp_interaction'] = data['tmp'] * data['tmp']
/var/folders/vd/0_phd7hx2n51y4412862zww00000gp/T/ipykernel_21901/3865254653.py:12: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data['lnfer_tmp_interaction'] = data['lnfer'] * data['tmp']
/var/folders/vd/0_phd7hx2n51y4412862zww00000gp/T/ipykernel_21901/3865254653.py:13: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data['lnfer_tmp_tmp_interaction'] = data['lnfer'] * data['tmp'] * data['tmp']
/var/folders/vd/0_phd7hx2n51y4412862zww00000gp/T/ipykernel_21901/3865254653.py:14: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data['year_geoid'] = data['year'] * data['geoid']
                            OLS Regression Results                            
==============================================================================
Dep. Variable:               lnyield   R-squared:                       0.748
Model:                            OLS   Adj. R-squared:                  0.701
Within R-squared:                 0.338
Method:                 Least Squares   F-statistic:                     1586871134.5
Date:                Fri, 15 Nov 2024   Prob (F-statistic):               0.00e+00
Time:                        11:19:05   Log-Likelihood:                 5776.80
No. Observations:                10075   AIC:                            -8379.6
Df Residuals:                    8488   BIC:                             3075.1
Df Model:                         1586                                         
Covariance Type:               cluster                             
==============================================================================

                                  Coef.      Std.Err.          z         P>|z|        [0.025        0.975]
Intercept                 -4.159007e+01  2.427799e+00 -17.130776  8.748001e-66 -4.634847e+01 -3.683168e+01
lnfer                     -1.348237e+00  2.249930e-01  -5.992351  2.068283e-09 -1.789216e+00 -9.072591e-01
tmp                        1.606939e-01  4.986077e-02   3.222853  1.269207e-03  6.296861e-02  2.584192e-01
tmp_tmp_interaction       -5.044027e-03  1.299541e-03  -3.881390  1.038610e-04 -7.591082e-03 -2.496973e-03
pre                        1.720517e-01  1.217527e-02  14.131246  2.437921e-45  1.481886e-01  1.959148e-01
pre_pre_interaction       -1.061143e-02  8.737040e-04 -12.145343  6.072751e-34 -1.232386e-02 -8.899006e-03
year                       2.397302e-02  1.223615e-03  19.591973  1.810374e-85  2.157478e-02  2.637126e-02
year_geoid                -1.563708e-07  3.726879e-08  -4.195757  2.719611e-05 -2.294163e-07 -8.332531e-08
lnfer_tmp_interaction      1.134965e-01  2.271550e-02   4.996433  5.840036e-07  6.897492e-02  1.580180e-01
lnfer_tmp_tmp_interaction -2.264421e-03  5.672504e-04  -3.991926  6.553891e-05 -3.376212e-03 -1.152631e-03
/Users/chenchenren/anaconda3/lib/python3.11/site-packages/statsmodels/base/model.py:1894: ValueWarning: covariance of constraints does not have full rank. The number of constraints is 1587, but rank is 9
  warnings.warn('covariance of constraints does not have full '
/var/folders/vd/0_phd7hx2n51y4412862zww00000gp/T/ipykernel_21901/3865254653.py:31: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data1['trend']=data1['year']
In [17]:
#model 5

# Filter data for irrigated 
data = df_soybean[df_soybean['group'] == 1]

# Prepare the interaction terms
data['pre_pre_interaction'] = data['pre'] * data['pre']
data['irrigation12'] = data['irrigation1'] * data['irrigation1']
data['lnfer_irrigation1_pre_interaction'] = data['lnfer'] * data['irrigation1'] * data['pre']
data['lnfer_irrigation12_pre_pre_interaction'] = data['lnfer'] * data['irrigation1'] * data['irrigation1'] * data['pre'] * data['pre']

data['tmp_tmp_interaction'] = data['tmp'] * data['tmp']
data['lnfer_irrigation1_tmp_interaction'] = data['lnfer'] * data['irrigation1'] * data['tmp']
data['lnfer_irrigation12_tmp_tmp_interaction'] = data['lnfer'] * data['irrigation1'] * data['irrigation1'] * data['tmp'] * data['tmp']
data['year_geoid'] = data['year'] * data['geoid']

# Define the formula for the regression model
formula = 'lnyield ~ lnfer + irrigation1 + irrigation12 + tmp + tmp_tmp_interaction + pre + pre_pre_interaction +year + year_geoid+lnfer_irrigation1_pre_interaction+ lnfer_irrigation12_pre_pre_interaction+  C(geoid)'

# Run the regression using the formula interface with clustered standard errors
model = sm.OLS.from_formula(formula, data=data).fit(cov_type='cluster', cov_kwds={'groups': data['geoid']})

# Extract the summary table
summary = model.summary2().tables[1]

# Filter out rows corresponding to dummy variables for geoid
exclude_terms = ['C(geoid)']
pattern = '|'.join([re.escape(term) for term in exclude_terms])
filtered_summary5 = summary.loc[~summary.index.str.contains(pattern)]

fixed_effects_only_model = sm.OLS.from_formula('lnyield ~ C(geoid)', data=data).fit()

# Calculate the total sum of squares (TSS) using the model with only fixed effects
tss_within = np.sum((fixed_effects_only_model.resid) ** 2)

# Calculate the residual sum of squares (RSS) from your main model
rss_within = np.sum((model.resid) ** 2)

# Calculate the within R-squared
within_r2 = 1 - (rss_within / tss_within)

# Format the OLS summary with the within R-squared
ols_info = f"""
                            OLS Regression Results                            
==============================================================================
Dep. Variable:               lnyield   R-squared:                       {model.rsquared:.3f}
Model:                            OLS   Adj. R-squared:                  {model.rsquared_adj:.3f}
Within R-squared:                 {within_r2:.3f}
Method:                 Least Squares   F-statistic:                     {model.fvalue:.1f}
Date:                {datetime.now():%a, %d %b %Y}   Prob (F-statistic):               {model.f_pvalue:.2e}
Time:                        {datetime.now():%H:%M:%S}   Log-Likelihood:                 {model.llf:.2f}
No. Observations:                {model.nobs:.0f}   AIC:                            {model.aic:.1f}
Df Residuals:                    {model.df_resid:.0f}   BIC:                             {model.bic:.1f}
Df Model:                         {model.df_model:.0f}                                         
Covariance Type:               cluster                             
==============================================================================
"""

# Combine the OLS info with the filtered summary
final_summary5 = ols_info + "\n" + filtered_summary5.to_string()

# Print the final summary
print(final_summary5)
/var/folders/vd/0_phd7hx2n51y4412862zww00000gp/T/ipykernel_21901/831502823.py:7: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data['pre_pre_interaction'] = data['pre'] * data['pre']
/var/folders/vd/0_phd7hx2n51y4412862zww00000gp/T/ipykernel_21901/831502823.py:8: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data['irrigation12'] = data['irrigation1'] * data['irrigation1']
/var/folders/vd/0_phd7hx2n51y4412862zww00000gp/T/ipykernel_21901/831502823.py:9: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data['lnfer_irrigation1_pre_interaction'] = data['lnfer'] * data['irrigation1'] * data['pre']
/var/folders/vd/0_phd7hx2n51y4412862zww00000gp/T/ipykernel_21901/831502823.py:10: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data['lnfer_irrigation12_pre_pre_interaction'] = data['lnfer'] * data['irrigation1'] * data['irrigation1'] * data['pre'] * data['pre']
/var/folders/vd/0_phd7hx2n51y4412862zww00000gp/T/ipykernel_21901/831502823.py:12: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data['tmp_tmp_interaction'] = data['tmp'] * data['tmp']
/var/folders/vd/0_phd7hx2n51y4412862zww00000gp/T/ipykernel_21901/831502823.py:13: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data['lnfer_irrigation1_tmp_interaction'] = data['lnfer'] * data['irrigation1'] * data['tmp']
/var/folders/vd/0_phd7hx2n51y4412862zww00000gp/T/ipykernel_21901/831502823.py:14: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data['lnfer_irrigation12_tmp_tmp_interaction'] = data['lnfer'] * data['irrigation1'] * data['irrigation1'] * data['tmp'] * data['tmp']
/var/folders/vd/0_phd7hx2n51y4412862zww00000gp/T/ipykernel_21901/831502823.py:15: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data['year_geoid'] = data['year'] * data['geoid']
/Users/chenchenren/anaconda3/lib/python3.11/site-packages/statsmodels/base/model.py:1894: ValueWarning: covariance of constraints does not have full rank. The number of constraints is 1260, but rank is 11
  warnings.warn('covariance of constraints does not have full '
                            OLS Regression Results                            
==============================================================================
Dep. Variable:               lnyield   R-squared:                       0.738
Model:                            OLS   Adj. R-squared:                  0.685
Within R-squared:                 0.317
Method:                 Least Squares   F-statistic:                     -1021640359.7
Date:                Fri, 15 Nov 2024   Prob (F-statistic):               1.00e+00
Time:                        11:19:10   Log-Likelihood:                 3546.14
No. Observations:                7507   AIC:                            -4570.3
Df Residuals:                    6246   BIC:                             4160.4
Df Model:                         1260                                         
Covariance Type:               cluster                             
==============================================================================

                                               Coef.      Std.Err.          z         P>|z|        [0.025        0.975]
Intercept                              -4.025514e+01  3.530404e+00 -11.402419  4.066618e-30 -4.717460e+01 -3.333568e+01
lnfer                                   1.303281e-02  1.003026e-02   1.299349  1.938242e-01 -6.626142e-03  3.269176e-02
irrigation1                             5.221407e-02  2.742767e-02   1.903701  5.694919e-02 -1.543174e-03  1.059713e-01
irrigation12                           -7.103953e-04  2.290441e-03  -0.310157  7.564419e-01 -5.199576e-03  3.778786e-03
tmp                                     5.261096e-01  3.503865e-02  15.015122  5.845459e-51  4.574351e-01  5.947841e-01
tmp_tmp_interaction                    -1.344705e-02  8.403977e-04 -16.000815  1.261141e-57 -1.509420e-02 -1.179990e-02
pre                                     2.314217e-01  1.639455e-02  14.115773  3.036694e-45  1.992890e-01  2.635544e-01
pre_pre_interaction                    -1.448400e-02  1.167307e-03 -12.408039  2.363713e-35 -1.677188e-02 -1.219612e-02
year                                    2.096354e-02  1.811326e-03  11.573585  5.608911e-31  1.741340e-02  2.451367e-02
year_geoid                             -3.666940e-08  5.900941e-08  -0.621416  5.343259e-01 -1.523257e-07  7.898692e-08
lnfer_irrigation1_pre_interaction      -6.158405e-03  1.981774e-03  -3.107522  1.886631e-03 -1.004261e-02 -2.274200e-03
lnfer_irrigation12_pre_pre_interaction  7.054057e-05  3.345749e-05   2.108364  3.499950e-02  4.965096e-06  1.361160e-04
In [18]:
#model 6

# Filter data for rainfed 
data = df_soybean[df_soybean['group'] == 2]

# Prepare the interaction terms
data['pre_pre_interaction'] = data['pre'] * data['pre']
data['lnfer_pre_interaction'] = data['lnfer'] * data['pre']
data['lnfer_pre_pre_interaction'] = data['lnfer'] * data['pre'] * data['pre']

data['tmp_tmp_interaction'] = data['tmp'] * data['tmp']
data['lnfer_tmp_interaction'] = data['lnfer'] * data['tmp']
data['lnfer_tmp_tmp_interaction'] = data['lnfer'] * data['tmp'] * data['tmp']
data['year_geoid'] = data['year'] * data['geoid']
data1=data

# Define the formula for the regression model
formula = 'lnyield ~ lnfer  + tmp + tmp_tmp_interaction + pre + pre_pre_interaction +year+ year_geoid+lnfer_pre_interaction+ lnfer_pre_pre_interaction + C(geoid)'

# Run the regression using the formula interface with clustered standard errors
model = sm.OLS.from_formula(formula, data=data).fit(cov_type='cluster', cov_kwds={'groups': data['geoid']})

# Extract the summary table
summary = model.summary2().tables[1]

# Filter out rows corresponding to dummy variables for geoid
exclude_terms = ['C(geoid)']
pattern = '|'.join([re.escape(term) for term in exclude_terms])
filtered_summary6 = summary.loc[~summary.index.str.contains(pattern)]
data1['trend']=data1['year']
data1 = data1.set_index(['geoid','year'])
# Define the formula for the fixed effects regression model with PanelOLS
formula_fe = (
    'lnyield ~ lnfer  + tmp + tmp_tmp_interaction + pre + pre_pre_interaction +lnfer_pre_interaction+ lnfer_pre_pre_interaction +trend+ year_geoid+ EntityEffects '
)
# Fit the fixed effects model using PanelOLS
fe_model = PanelOLS.from_formula(formula_fe, data=data1)
fe_results = fe_model.fit(cov_type='robust')


# Format the OLS summary with the within R-squared
ols_info = f"""
                            OLS Regression Results                            
==============================================================================
Dep. Variable:               lnyield   R-squared:                       {model.rsquared:.3f}
Model:                            OLS   Adj. R-squared:                  {model.rsquared_adj:.3f}
Within R-squared:                {fe_results.rsquared_within:.3f}
Method:                 Least Squares   F-statistic:                     {model.fvalue:.1f}
Date:                {datetime.now():%a, %d %b %Y}   Prob (F-statistic):               {model.f_pvalue:.2e}
Time:                        {datetime.now():%H:%M:%S}   Log-Likelihood:                 {model.llf:.2f}
No. Observations:                {model.nobs:.0f}   AIC:                            {model.aic:.1f}
Df Residuals:                    {model.df_resid:.0f}   BIC:                             {model.bic:.1f}
Df Model:                         {model.df_model:.0f}                                         
Covariance Type:               cluster                             
==============================================================================
"""

# Combine the OLS info with the filtered summary
final_summary6 = ols_info + "\n" + filtered_summary6.to_string()

# Print the final summary
print(final_summary6)
/var/folders/vd/0_phd7hx2n51y4412862zww00000gp/T/ipykernel_21901/2586232414.py:7: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data['pre_pre_interaction'] = data['pre'] * data['pre']
/var/folders/vd/0_phd7hx2n51y4412862zww00000gp/T/ipykernel_21901/2586232414.py:8: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data['lnfer_pre_interaction'] = data['lnfer'] * data['pre']
/var/folders/vd/0_phd7hx2n51y4412862zww00000gp/T/ipykernel_21901/2586232414.py:9: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data['lnfer_pre_pre_interaction'] = data['lnfer'] * data['pre'] * data['pre']
/var/folders/vd/0_phd7hx2n51y4412862zww00000gp/T/ipykernel_21901/2586232414.py:11: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data['tmp_tmp_interaction'] = data['tmp'] * data['tmp']
/var/folders/vd/0_phd7hx2n51y4412862zww00000gp/T/ipykernel_21901/2586232414.py:12: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data['lnfer_tmp_interaction'] = data['lnfer'] * data['tmp']
/var/folders/vd/0_phd7hx2n51y4412862zww00000gp/T/ipykernel_21901/2586232414.py:13: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data['lnfer_tmp_tmp_interaction'] = data['lnfer'] * data['tmp'] * data['tmp']
/var/folders/vd/0_phd7hx2n51y4412862zww00000gp/T/ipykernel_21901/2586232414.py:14: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data['year_geoid'] = data['year'] * data['geoid']
                            OLS Regression Results                            
==============================================================================
Dep. Variable:               lnyield   R-squared:                       0.743
Model:                            OLS   Adj. R-squared:                  0.695
Within R-squared:                0.325
Method:                 Least Squares   F-statistic:                     -34298358.6
Date:                Fri, 15 Nov 2024   Prob (F-statistic):               1.00e+00
Time:                        11:19:15   Log-Likelihood:                 5684.24
No. Observations:                10075   AIC:                            -8194.5
Df Residuals:                    8488   BIC:                             3260.2
Df Model:                         1586                                         
Covariance Type:               cluster                             
==============================================================================

                                  Coef.      Std.Err.          z         P>|z|        [0.025        0.975]
Intercept                 -4.426475e+01  2.442425e+00 -18.123281  2.087673e-73 -4.905181e+01 -3.947768e+01
lnfer                     -9.289319e-02  4.463449e-02  -2.081198  3.741583e-02 -1.803752e-01 -5.411199e-03
tmp                        2.948238e-01  2.382286e-02  12.375668  3.539384e-35  2.481319e-01  3.415158e-01
tmp_tmp_interaction       -7.614167e-03  6.208992e-04 -12.263129  1.428749e-34 -8.831107e-03 -6.397227e-03
pre                        1.507325e-01  2.264502e-02   6.656321  2.807658e-11  1.063491e-01  1.951159e-01
pre_pre_interaction       -9.830327e-03  1.661822e-03  -5.915393  3.310851e-09 -1.308744e-02 -6.573217e-03
year                       2.457080e-02  1.256239e-03  19.559016  3.456824e-85  2.210861e-02  2.703298e-02
year_geoid                -1.947294e-07  3.728484e-08  -5.222749  1.762859e-07 -2.678063e-07 -1.216524e-07
lnfer_pre_interaction      2.398136e-02  1.284979e-02   1.866284  6.200158e-02 -1.203762e-03  4.916648e-02
lnfer_pre_pre_interaction -1.254279e-03  8.938748e-04  -1.403193  1.605592e-01 -3.006242e-03  4.976832e-04
/Users/chenchenren/anaconda3/lib/python3.11/site-packages/statsmodels/base/model.py:1894: ValueWarning: covariance of constraints does not have full rank. The number of constraints is 1587, but rank is 9
  warnings.warn('covariance of constraints does not have full '
/var/folders/vd/0_phd7hx2n51y4412862zww00000gp/T/ipykernel_21901/2586232414.py:30: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data1['trend']=data1['year']
In [19]:
#model 7

# Filter data for irrigated 
data = df_soybean[df_soybean['group'] == 1]

# Prepare the interaction terms
data['pre_pre_interaction'] = data['pre'] * data['pre']
data['irrigation12'] = data['irrigation1'] * data['irrigation1']
data['lnfer_irrigation1_pre_interaction'] = data['lnfer'] * data['irrigation1'] * data['pre']
data['lnfer_irrigation12_pre_pre_interaction'] = data['lnfer'] * data['irrigation1'] * data['irrigation1'] * data['pre'] * data['pre']

data['tmp_tmp_interaction'] = data['tmp'] * data['tmp']
data['lnfer_irrigation1_tmp_interaction'] = data['lnfer'] * data['irrigation1'] * data['tmp']
data['lnfer_irrigation12_tmp_tmp_interaction'] = data['lnfer'] * data['irrigation1'] * data['irrigation1'] * data['tmp'] * data['tmp']
data['year_geoid'] = data['year'] * data['geoid']

# Define the formula for the regression model
formula = 'lnyield ~ lnfer + irrigation1 + irrigation12 + tmp + tmp_tmp_interaction +year+ year_geoid+ lnfer_irrigation1_tmp_interaction+ lnfer_irrigation12_tmp_tmp_interaction+lnfer_irrigation1_pre_interaction+ lnfer_irrigation12_pre_pre_interaction+ pre + pre_pre_interaction + C(geoid)'

# Run the regression using the formula interface with clustered standard errors
model = sm.OLS.from_formula(formula, data=data).fit(cov_type='cluster', cov_kwds={'groups': data['geoid']})

# Extract the summary table
summary = model.summary2().tables[1]

# Filter out rows corresponding to dummy variables for geoid
exclude_terms = ['C(geoid)']
pattern = '|'.join([re.escape(term) for term in exclude_terms])
filtered_summary7 = summary.loc[~summary.index.str.contains(pattern)]

fixed_effects_only_model = sm.OLS.from_formula('lnyield ~ C(geoid)', data=data).fit()

# Calculate the total sum of squares (TSS) using the model with only fixed effects
tss_within = np.sum((fixed_effects_only_model.resid) ** 2)

# Calculate the residual sum of squares (RSS) from your main model
rss_within = np.sum((model.resid) ** 2)

# Calculate the within R-squared
within_r2 = 1 - (rss_within / tss_within)

# Format the OLS summary with the within R-squared
ols_info = f"""
                            OLS Regression Results                            
==============================================================================
Dep. Variable:               lnyield   R-squared:                       {model.rsquared:.3f}
Model:                            OLS   Adj. R-squared:                  {model.rsquared_adj:.3f}
Within R-squared:                 {within_r2:.3f}
Method:                 Least Squares   F-statistic:                     {model.fvalue:.1f}
Date:                {datetime.now():%a, %d %b %Y}   Prob (F-statistic):               {model.f_pvalue:.2e}
Time:                        {datetime.now():%H:%M:%S}   Log-Likelihood:                 {model.llf:.2f}
No. Observations:                {model.nobs:.0f}   AIC:                            {model.aic:.1f}
Df Residuals:                    {model.df_resid:.0f}   BIC:                             {model.bic:.1f}
Df Model:                         {model.df_model:.0f}                                         
Covariance Type:               cluster                             
==============================================================================
"""

# Combine the OLS info with the filtered summary
final_summary7 = ols_info + "\n" + filtered_summary7.to_string()

# Print the final summary
print(final_summary7)
/var/folders/vd/0_phd7hx2n51y4412862zww00000gp/T/ipykernel_21901/2551732332.py:7: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data['pre_pre_interaction'] = data['pre'] * data['pre']
/var/folders/vd/0_phd7hx2n51y4412862zww00000gp/T/ipykernel_21901/2551732332.py:8: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data['irrigation12'] = data['irrigation1'] * data['irrigation1']
/var/folders/vd/0_phd7hx2n51y4412862zww00000gp/T/ipykernel_21901/2551732332.py:9: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data['lnfer_irrigation1_pre_interaction'] = data['lnfer'] * data['irrigation1'] * data['pre']
/var/folders/vd/0_phd7hx2n51y4412862zww00000gp/T/ipykernel_21901/2551732332.py:10: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data['lnfer_irrigation12_pre_pre_interaction'] = data['lnfer'] * data['irrigation1'] * data['irrigation1'] * data['pre'] * data['pre']
/var/folders/vd/0_phd7hx2n51y4412862zww00000gp/T/ipykernel_21901/2551732332.py:12: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data['tmp_tmp_interaction'] = data['tmp'] * data['tmp']
/var/folders/vd/0_phd7hx2n51y4412862zww00000gp/T/ipykernel_21901/2551732332.py:13: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data['lnfer_irrigation1_tmp_interaction'] = data['lnfer'] * data['irrigation1'] * data['tmp']
/var/folders/vd/0_phd7hx2n51y4412862zww00000gp/T/ipykernel_21901/2551732332.py:14: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data['lnfer_irrigation12_tmp_tmp_interaction'] = data['lnfer'] * data['irrigation1'] * data['irrigation1'] * data['tmp'] * data['tmp']
/var/folders/vd/0_phd7hx2n51y4412862zww00000gp/T/ipykernel_21901/2551732332.py:15: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data['year_geoid'] = data['year'] * data['geoid']
/Users/chenchenren/anaconda3/lib/python3.11/site-packages/statsmodels/base/model.py:1894: ValueWarning: covariance of constraints does not have full rank. The number of constraints is 1262, but rank is 13
  warnings.warn('covariance of constraints does not have full '
                            OLS Regression Results                            
==============================================================================
Dep. Variable:               lnyield   R-squared:                       0.739
Model:                            OLS   Adj. R-squared:                  0.686
Within R-squared:                 0.319
Method:                 Least Squares   F-statistic:                     -111784034.6
Date:                Fri, 15 Nov 2024   Prob (F-statistic):               1.00e+00
Time:                        11:19:20   Log-Likelihood:                 3554.34
No. Observations:                7507   AIC:                            -4582.7
Df Residuals:                    6244   BIC:                             4161.8
Df Model:                         1262                                         
Covariance Type:               cluster                             
==============================================================================

                                               Coef.      Std.Err.          z         P>|z|        [0.025        0.975]
Intercept                              -3.999831e+01  3.516756e+00 -11.373639  5.657834e-30 -4.689103e+01 -3.310560e+01
lnfer                                   1.125907e-02  9.948052e-03   1.131786  2.577243e-01 -8.238755e-03  3.075689e-02
irrigation1                             2.609614e-02  4.440535e-02   0.587680  5.567470e-01 -6.093674e-02  1.131290e-01
irrigation12                           -2.465586e-03  6.676493e-03  -0.369294  7.119090e-01 -1.555127e-02  1.062010e-02
tmp                                     5.379418e-01  3.513004e-02  15.312872  6.273192e-53  4.690882e-01  6.067954e-01
tmp_tmp_interaction                    -1.374014e-02  8.423301e-04 -16.312058  8.101710e-60 -1.539107e-02 -1.208920e-02
year                                    2.077213e-02  1.798938e-03  11.546882  7.654774e-31  1.724627e-02  2.429798e-02
year_geoid                             -3.051634e-08  5.854957e-08  -0.521205  6.022238e-01 -1.452714e-07  8.423870e-08
lnfer_irrigation1_tmp_interaction       2.071831e-03  1.269940e-03   1.631440  1.027975e-01 -4.172060e-04  4.560868e-03
lnfer_irrigation12_tmp_tmp_interaction -2.118492e-06  5.544439e-06  -0.382093  7.023923e-01 -1.298539e-05  8.748409e-06
lnfer_irrigation1_pre_interaction      -1.262525e-02  3.683210e-03  -3.427785  6.085279e-04 -1.984421e-02 -5.406292e-03
lnfer_irrigation12_pre_pre_interaction  1.195169e-04  4.084788e-05   2.925902  3.434588e-03  3.945653e-05  1.995773e-04
pre                                     2.326726e-01  1.622412e-02  14.341152  1.210295e-46  2.008739e-01  2.644713e-01
pre_pre_interaction                    -1.444476e-02  1.154386e-03 -12.512941  6.343126e-36 -1.670732e-02 -1.218221e-02
In [20]:
#model 8

# Filter data for rainfed 
data = df_soybean[df_soybean['group'] == 2]

# Prepare the interaction terms
data['pre_pre_interaction'] = data['pre'] * data['pre']
data['lnfer_pre_interaction'] = data['lnfer'] * data['pre']
data['lnfer_pre_pre_interaction'] = data['lnfer'] * data['pre'] * data['pre']

data['tmp_tmp_interaction'] = data['tmp'] * data['tmp']
data['lnfer_tmp_interaction'] = data['lnfer'] * data['tmp']
data['lnfer_tmp_tmp_interaction'] = data['lnfer'] * data['tmp'] * data['tmp']
data['year_geoid'] = data['year'] * data['geoid']

data1=data

# Define the formula for the regression model
formula = 'lnyield ~ lnfer  + tmp + tmp_tmp_interaction + pre +pre_pre_interaction+year+ year_geoid+ lnfer_tmp_interaction+lnfer_tmp_tmp_interaction +lnfer_pre_interaction+ lnfer_pre_pre_interaction + C(geoid)'

# Run the regression using the formula interface with clustered standard errors
model = sm.OLS.from_formula(formula, data=data).fit(cov_type='cluster', cov_kwds={'groups': data['geoid']})

# Extract the summary table
summary = model.summary2().tables[1]

# Filter out rows corresponding to dummy variables for geoid
exclude_terms = ['C(geoid)']
pattern = '|'.join([re.escape(term) for term in exclude_terms])
filtered_summary8 = summary.loc[~summary.index.str.contains(pattern)]
data1['trend']=data1['year']
data1 = data1.set_index(['geoid','year'])
# Define the formula for the fixed effects regression model with PanelOLS
formula_fe = (
    'lnyield ~ lnfer  + tmp + tmp_tmp_interaction + pre +pre_pre_interaction+trend+ year_geoid+ lnfer_tmp_interaction+lnfer_tmp_tmp_interaction +lnfer_pre_interaction+ lnfer_pre_pre_interaction + EntityEffects '
)
# Fit the fixed effects model using PanelOLS
fe_model = PanelOLS.from_formula(formula_fe, data=data1)
fe_results = fe_model.fit(cov_type='robust')

# Format the OLS summary with the within R-squared
ols_info = f"""
                            OLS Regression Results                            
==============================================================================
Dep. Variable:               lnyield   R-squared:                       {model.rsquared:.3f}
Model:                            OLS   Adj. R-squared:                  {model.rsquared_adj:.3f}
Within R-squared:               {fe_results.rsquared_within:.3f}
Method:                 Least Squares   F-statistic:                     {model.fvalue:.1f}
Date:                {datetime.now():%a, %d %b %Y}   Prob (F-statistic):               {model.f_pvalue:.2e}
Time:                        {datetime.now():%H:%M:%S}   Log-Likelihood:                 {model.llf:.2f}
No. Observations:                {model.nobs:.0f}   AIC:                            {model.aic:.1f}
Df Residuals:                    {model.df_resid:.0f}   BIC:                             {model.bic:.1f}
Df Model:                         {model.df_model:.0f}                                         
Covariance Type:               cluster                             
==============================================================================
"""

# Combine the OLS info with the filtered summary
final_summary8 = ols_info + "\n" + filtered_summary8.to_string()

# Print the final summary
print(final_summary8)
/var/folders/vd/0_phd7hx2n51y4412862zww00000gp/T/ipykernel_21901/3559849126.py:7: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data['pre_pre_interaction'] = data['pre'] * data['pre']
/var/folders/vd/0_phd7hx2n51y4412862zww00000gp/T/ipykernel_21901/3559849126.py:8: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data['lnfer_pre_interaction'] = data['lnfer'] * data['pre']
/var/folders/vd/0_phd7hx2n51y4412862zww00000gp/T/ipykernel_21901/3559849126.py:9: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data['lnfer_pre_pre_interaction'] = data['lnfer'] * data['pre'] * data['pre']
/var/folders/vd/0_phd7hx2n51y4412862zww00000gp/T/ipykernel_21901/3559849126.py:11: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data['tmp_tmp_interaction'] = data['tmp'] * data['tmp']
/var/folders/vd/0_phd7hx2n51y4412862zww00000gp/T/ipykernel_21901/3559849126.py:12: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data['lnfer_tmp_interaction'] = data['lnfer'] * data['tmp']
/var/folders/vd/0_phd7hx2n51y4412862zww00000gp/T/ipykernel_21901/3559849126.py:13: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data['lnfer_tmp_tmp_interaction'] = data['lnfer'] * data['tmp'] * data['tmp']
/var/folders/vd/0_phd7hx2n51y4412862zww00000gp/T/ipykernel_21901/3559849126.py:14: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data['year_geoid'] = data['year'] * data['geoid']
                            OLS Regression Results                            
==============================================================================
Dep. Variable:               lnyield   R-squared:                       0.749
Model:                            OLS   Adj. R-squared:                  0.702
Within R-squared:               0.341
Method:                 Least Squares   F-statistic:                     31074990.5
Date:                Fri, 15 Nov 2024   Prob (F-statistic):               0.00e+00
Time:                        11:19:26   Log-Likelihood:                 5801.49
No. Observations:                10075   AIC:                            -8425.0
Df Residuals:                    8486   BIC:                             3044.1
Df Model:                         1588                                         
Covariance Type:               cluster                             
==============================================================================

                                  Coef.      Std.Err.          z         P>|z|        [0.025        0.975]
Intercept                 -4.002823e+01  2.438288e+00 -16.416530  1.456612e-60 -4.480719e+01 -3.524927e+01
lnfer                     -1.357485e+00  2.308549e-01  -5.880252  4.096416e-09 -1.809952e+00 -9.050177e-01
tmp                        1.784050e-01  4.987598e-02   3.576971  3.475981e-04  8.064984e-02  2.761601e-01
tmp_tmp_interaction       -5.489017e-03  1.301083e-03  -4.218805  2.456000e-05 -8.039093e-03 -2.938941e-03
pre                        1.378057e-01  2.232611e-02   6.172401  6.726077e-10  9.404731e-02  1.815640e-01
pre_pre_interaction       -8.837721e-03  1.645483e-03  -5.370898  7.834573e-08 -1.206281e-02 -5.612633e-03
year                       2.316980e-02  1.224421e-03  18.923059  7.365373e-80  2.076998e-02  2.556962e-02
year_geoid                -1.393738e-07  3.716933e-08  -3.749700  1.770463e-04 -2.122244e-07 -6.652328e-08
lnfer_tmp_interaction      1.035174e-01  2.225610e-02   4.651192  3.300223e-06  5.989624e-02  1.471385e-01
lnfer_tmp_tmp_interaction -2.014753e-03  5.551371e-04  -3.629290  2.842020e-04 -3.102802e-03 -9.267047e-04
lnfer_pre_interaction      2.776295e-02  1.254637e-02   2.212827  2.690960e-02  3.172509e-03  5.235339e-02
lnfer_pre_pre_interaction -1.569957e-03  8.749615e-04  -1.794315  7.276285e-02 -3.284850e-03  1.449362e-04
/Users/chenchenren/anaconda3/lib/python3.11/site-packages/statsmodels/base/model.py:1894: ValueWarning: covariance of constraints does not have full rank. The number of constraints is 1589, but rank is 11
  warnings.warn('covariance of constraints does not have full '
/var/folders/vd/0_phd7hx2n51y4412862zww00000gp/T/ipykernel_21901/3559849126.py:31: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data1['trend']=data1['year']
In [21]:
# Placeholder dictionary to store results for each final_summary
results_dict = {}
def add_significance_asterisks(coef, p_value):
    if p_value <= 0.01:
        return f"{round(coef, 3)}***"
    elif p_value <= 0.05:
        return f"{round(coef, 3)}**"
    elif p_value <= 0.1:
        return f"{round(coef, 3)}*"
    else:
        return f"{round(coef, 3)}"

# Define a function to process each final_summary DataFrame
def process_summary(summary, model_name):
    # Create a list to store the structured data for this summary
    structured_data = []
    for variable, row in summary.iterrows():
        # Apply significance asterisks to the coefficient based on p-value
        coef_with_significance = add_significance_asterisks(row['Coef.'], row['P>|z|'])
        
        # Format standard error with parentheses
        std_err_with_parentheses = f"({round(row['Std.Err.'], 3)})"
        
        # Append coefficient with significance, standard error with parentheses, and p-value as separate rows for each variable
        structured_data.append((variable, 'Coef.', coef_with_significance))
        structured_data.append((variable, 'Std.Err.', std_err_with_parentheses))
        structured_data.append((variable, 'P>|z|', round(row['P>|z|'], 3)))
    
    # Convert to DataFrame and return
    return pd.DataFrame(structured_data, columns=['Variable', 'Metric', 'Value']).set_index(['Variable', 'Metric'])

# List of your final summaries
filtered_summaries = [filtered_summary1, filtered_summary2, filtered_summary3, filtered_summary4, 
                   filtered_summary5, filtered_summary6, filtered_summary7, filtered_summary8]

# Loop over each final_summary and process it
for i, summary in enumerate(filtered_summaries, start=1):
    # Process each summary and add to the dictionary
    results_dict[f'Model_{i}'] = process_summary(summary, f'Model_{i}')

# Concatenate all model results along columns
final_results = pd.concat(results_dict, axis=1)

# Export to Excel
final_results.to_excel("model_summary_table14.xlsx")
print(final_results)
print("Model summary statistics with significance have been saved to model_summary_statistics_with_significance.xlsx")
                                                    Model_1    Model_2  \
                                                      Value      Value   
Variable                               Metric                            
Intercept                              Coef.     -39.937***  -46.16***   
                                       Std.Err.     (3.533)    (2.443)   
                                       P>|z|            0.0        0.0   
lnfer                                  Coef.          0.006      0.003   
                                       Std.Err.      (0.01)    (0.008)   
                                       P>|z|          0.536      0.668   
irrigation1                            Coef.         -0.016        NaN   
                                       Std.Err.     (0.016)        NaN   
                                       P>|z|          0.331        NaN   
irrigation12                           Coef.        0.004**        NaN   
                                       Std.Err.     (0.002)        NaN   
                                       P>|z|          0.044        NaN   
tmp                                    Coef.        0.52***   0.295***   
                                       Std.Err.     (0.035)    (0.024)   
                                       P>|z|            0.0        0.0   
tmp_tmp_interaction                    Coef.      -0.013***  -0.008***   
                                       Std.Err.     (0.001)    (0.001)   
                                       P>|z|            0.0        0.0   
pre                                    Coef.       0.227***   0.179***   
                                       Std.Err.     (0.016)    (0.012)   
                                       P>|z|            0.0        0.0   
pre_pre_interaction                    Coef.      -0.014***  -0.011***   
                                       Std.Err.     (0.001)    (0.001)   
                                       P>|z|            0.0        0.0   
year                                   Coef.       0.021***   0.025***   
                                       Std.Err.     (0.002)    (0.001)   
                                       P>|z|            0.0        0.0   
year_geoid                             Coef.           -0.0    -0.0***   
                                       Std.Err.       (0.0)      (0.0)   
                                       P>|z|          0.651        0.0   
lnfer_irrigation1_tmp_interaction      Coef.            NaN        NaN   
                                       Std.Err.         NaN        NaN   
                                       P>|z|            NaN        NaN   
lnfer_irrigation12_tmp_tmp_interaction Coef.            NaN        NaN   
                                       Std.Err.         NaN        NaN   
                                       P>|z|            NaN        NaN   
lnfer_tmp_interaction                  Coef.            NaN        NaN   
                                       Std.Err.         NaN        NaN   
                                       P>|z|            NaN        NaN   
lnfer_tmp_tmp_interaction              Coef.            NaN        NaN   
                                       Std.Err.         NaN        NaN   
                                       P>|z|            NaN        NaN   
lnfer_irrigation1_pre_interaction      Coef.            NaN        NaN   
                                       Std.Err.         NaN        NaN   
                                       P>|z|            NaN        NaN   
lnfer_irrigation12_pre_pre_interaction Coef.            NaN        NaN   
                                       Std.Err.         NaN        NaN   
                                       P>|z|            NaN        NaN   
lnfer_pre_interaction                  Coef.            NaN        NaN   
                                       Std.Err.         NaN        NaN   
                                       P>|z|            NaN        NaN   
lnfer_pre_pre_interaction              Coef.            NaN        NaN   
                                       Std.Err.         NaN        NaN   
                                       P>|z|            NaN        NaN   

                                                    Model_3    Model_4  \
                                                      Value      Value   
Variable                               Metric                            
Intercept                              Coef.     -40.172***  -41.59***   
                                       Std.Err.     (3.525)    (2.428)   
                                       P>|z|            0.0        0.0   
lnfer                                  Coef.          0.011  -1.348***   
                                       Std.Err.      (0.01)    (0.225)   
                                       P>|z|          0.262        0.0   
irrigation1                            Coef.          0.052        NaN   
                                       Std.Err.     (0.045)        NaN   
                                       P>|z|          0.248        NaN   
irrigation12                           Coef.         -0.006        NaN   
                                       Std.Err.     (0.007)        NaN   
                                       P>|z|          0.426        NaN   
tmp                                    Coef.       0.519***   0.161***   
                                       Std.Err.     (0.035)     (0.05)   
                                       P>|z|            0.0      0.001   
tmp_tmp_interaction                    Coef.      -0.013***  -0.005***   
                                       Std.Err.     (0.001)    (0.001)   
                                       P>|z|            0.0        0.0   
pre                                    Coef.       0.228***   0.172***   
                                       Std.Err.     (0.016)    (0.012)   
                                       P>|z|            0.0        0.0   
pre_pre_interaction                    Coef.      -0.014***  -0.011***   
                                       Std.Err.     (0.001)    (0.001)   
                                       P>|z|            0.0        0.0   
year                                   Coef.       0.021***   0.024***   
                                       Std.Err.     (0.002)    (0.001)   
                                       P>|z|            0.0        0.0   
year_geoid                             Coef.           -0.0    -0.0***   
                                       Std.Err.       (0.0)      (0.0)   
                                       P>|z|          0.553        0.0   
lnfer_irrigation1_tmp_interaction      Coef.        -0.001*        NaN   
                                       Std.Err.     (0.001)        NaN   
                                       P>|z|          0.091        NaN   
lnfer_irrigation12_tmp_tmp_interaction Coef.            0.0        NaN   
                                       Std.Err.       (0.0)        NaN   
                                       P>|z|          0.166        NaN   
lnfer_tmp_interaction                  Coef.            NaN   0.113***   
                                       Std.Err.         NaN    (0.023)   
                                       P>|z|            NaN        0.0   
lnfer_tmp_tmp_interaction              Coef.            NaN  -0.002***   
                                       Std.Err.         NaN    (0.001)   
                                       P>|z|            NaN        0.0   
lnfer_irrigation1_pre_interaction      Coef.            NaN        NaN   
                                       Std.Err.         NaN        NaN   
                                       P>|z|            NaN        NaN   
lnfer_irrigation12_pre_pre_interaction Coef.            NaN        NaN   
                                       Std.Err.         NaN        NaN   
                                       P>|z|            NaN        NaN   
lnfer_pre_interaction                  Coef.            NaN        NaN   
                                       Std.Err.         NaN        NaN   
                                       P>|z|            NaN        NaN   
lnfer_pre_pre_interaction              Coef.            NaN        NaN   
                                       Std.Err.         NaN        NaN   
                                       P>|z|            NaN        NaN   

                                                    Model_5     Model_6  \
                                                      Value       Value   
Variable                               Metric                             
Intercept                              Coef.     -40.255***  -44.265***   
                                       Std.Err.      (3.53)     (2.442)   
                                       P>|z|            0.0         0.0   
lnfer                                  Coef.          0.013    -0.093**   
                                       Std.Err.      (0.01)     (0.045)   
                                       P>|z|          0.194       0.037   
irrigation1                            Coef.         0.052*         NaN   
                                       Std.Err.     (0.027)         NaN   
                                       P>|z|          0.057         NaN   
irrigation12                           Coef.         -0.001         NaN   
                                       Std.Err.     (0.002)         NaN   
                                       P>|z|          0.756         NaN   
tmp                                    Coef.       0.526***    0.295***   
                                       Std.Err.     (0.035)     (0.024)   
                                       P>|z|            0.0         0.0   
tmp_tmp_interaction                    Coef.      -0.013***   -0.008***   
                                       Std.Err.     (0.001)     (0.001)   
                                       P>|z|            0.0         0.0   
pre                                    Coef.       0.231***    0.151***   
                                       Std.Err.     (0.016)     (0.023)   
                                       P>|z|            0.0         0.0   
pre_pre_interaction                    Coef.      -0.014***    -0.01***   
                                       Std.Err.     (0.001)     (0.002)   
                                       P>|z|            0.0         0.0   
year                                   Coef.       0.021***    0.025***   
                                       Std.Err.     (0.002)     (0.001)   
                                       P>|z|            0.0         0.0   
year_geoid                             Coef.           -0.0     -0.0***   
                                       Std.Err.       (0.0)       (0.0)   
                                       P>|z|          0.534         0.0   
lnfer_irrigation1_tmp_interaction      Coef.            NaN         NaN   
                                       Std.Err.         NaN         NaN   
                                       P>|z|            NaN         NaN   
lnfer_irrigation12_tmp_tmp_interaction Coef.            NaN         NaN   
                                       Std.Err.         NaN         NaN   
                                       P>|z|            NaN         NaN   
lnfer_tmp_interaction                  Coef.            NaN         NaN   
                                       Std.Err.         NaN         NaN   
                                       P>|z|            NaN         NaN   
lnfer_tmp_tmp_interaction              Coef.            NaN         NaN   
                                       Std.Err.         NaN         NaN   
                                       P>|z|            NaN         NaN   
lnfer_irrigation1_pre_interaction      Coef.      -0.006***         NaN   
                                       Std.Err.     (0.002)         NaN   
                                       P>|z|          0.002         NaN   
lnfer_irrigation12_pre_pre_interaction Coef.          0.0**         NaN   
                                       Std.Err.       (0.0)         NaN   
                                       P>|z|          0.035         NaN   
lnfer_pre_interaction                  Coef.            NaN      0.024*   
                                       Std.Err.         NaN     (0.013)   
                                       P>|z|            NaN       0.062   
lnfer_pre_pre_interaction              Coef.            NaN      -0.001   
                                       Std.Err.         NaN     (0.001)   
                                       P>|z|            NaN       0.161   

                                                    Model_7     Model_8  
                                                      Value       Value  
Variable                               Metric                            
Intercept                              Coef.     -39.998***  -40.028***  
                                       Std.Err.     (3.517)     (2.438)  
                                       P>|z|            0.0         0.0  
lnfer                                  Coef.          0.011   -1.357***  
                                       Std.Err.      (0.01)     (0.231)  
                                       P>|z|          0.258         0.0  
irrigation1                            Coef.          0.026         NaN  
                                       Std.Err.     (0.044)         NaN  
                                       P>|z|          0.557         NaN  
irrigation12                           Coef.         -0.002         NaN  
                                       Std.Err.     (0.007)         NaN  
                                       P>|z|          0.712         NaN  
tmp                                    Coef.       0.538***    0.178***  
                                       Std.Err.     (0.035)      (0.05)  
                                       P>|z|            0.0         0.0  
tmp_tmp_interaction                    Coef.      -0.014***   -0.005***  
                                       Std.Err.     (0.001)     (0.001)  
                                       P>|z|            0.0         0.0  
pre                                    Coef.       0.233***    0.138***  
                                       Std.Err.     (0.016)     (0.022)  
                                       P>|z|            0.0         0.0  
pre_pre_interaction                    Coef.      -0.014***   -0.009***  
                                       Std.Err.     (0.001)     (0.002)  
                                       P>|z|            0.0         0.0  
year                                   Coef.       0.021***    0.023***  
                                       Std.Err.     (0.002)     (0.001)  
                                       P>|z|            0.0         0.0  
year_geoid                             Coef.           -0.0     -0.0***  
                                       Std.Err.       (0.0)       (0.0)  
                                       P>|z|          0.602         0.0  
lnfer_irrigation1_tmp_interaction      Coef.          0.002         NaN  
                                       Std.Err.     (0.001)         NaN  
                                       P>|z|          0.103         NaN  
lnfer_irrigation12_tmp_tmp_interaction Coef.           -0.0         NaN  
                                       Std.Err.       (0.0)         NaN  
                                       P>|z|          0.702         NaN  
lnfer_tmp_interaction                  Coef.            NaN    0.104***  
                                       Std.Err.         NaN     (0.022)  
                                       P>|z|            NaN         0.0  
lnfer_tmp_tmp_interaction              Coef.            NaN   -0.002***  
                                       Std.Err.         NaN     (0.001)  
                                       P>|z|            NaN         0.0  
lnfer_irrigation1_pre_interaction      Coef.      -0.013***         NaN  
                                       Std.Err.     (0.004)         NaN  
                                       P>|z|          0.001         NaN  
lnfer_irrigation12_pre_pre_interaction Coef.         0.0***         NaN  
                                       Std.Err.       (0.0)         NaN  
                                       P>|z|          0.003         NaN  
lnfer_pre_interaction                  Coef.            NaN     0.028**  
                                       Std.Err.         NaN     (0.013)  
                                       P>|z|            NaN       0.027  
lnfer_pre_pre_interaction              Coef.            NaN     -0.002*  
                                       Std.Err.         NaN     (0.001)  
                                       P>|z|            NaN       0.073  
Model summary statistics with significance have been saved to model_summary_statistics_with_significance.xlsx
In [ ]: