import pandas as pd
import numpy as np
import statsmodels.api as sm
import matplotlib.pyplot as plt
import numpy as np
import matplotlib.pyplot as plt
import netCDF4 as nc
import pandas as pd
import os
import csv
from glob import glob
import xarray as xr
import matplotlib.ticker as ticker
import numpy as np
import pandas as pd
import statsmodels.api as sm
import matplotlib.pyplot as plt
from matplotlib.gridspec import GridSpec
import re
from datetime import datetime
# Step 1: Data Preparation
# Load your dataset into a pandas DataFrame
os.chdir('/Users/chenchenren/postdoc/paper/2N and water-US/regression/')
# Filter the dataframe for rows where crop is 'corn'
# Path to the Excel file
excel_file = "./corn_reg_data.csv"
# Read the specific sheet from the Excel file
ds = pd.read_csv(excel_file)
df_corn =ds
print(df_corn)
geoid year tmp pre irrigation1 lnyield lnfer \ 0 1001.0 2011.0 22.514565 7.140138 0.502789 8.849908 6.394569 1 1001.0 2013.0 21.656385 10.242795 0.002722 8.737623 6.622513 2 1001.0 2017.0 22.842094 11.152807 0.018559 9.279970 6.419519 3 1001.0 2018.0 22.934221 10.243298 1.663942 9.249052 6.429010 4 1001.0 2019.0 23.288399 8.607845 0.354742 9.249052 6.419507 ... ... ... ... ... ... ... ... 19939 56031.0 2011.0 12.459663 3.571071 0.000000 9.105812 6.221997 19940 56031.0 2014.0 12.139701 3.237197 0.000000 9.166642 6.244471 19941 56043.0 2008.0 12.025408 2.338907 0.000000 9.182902 6.500314 19942 56043.0 2011.0 12.087538 2.497200 0.000000 9.025305 6.809824 19943 56043.0 2013.0 12.322425 2.396772 0.000000 9.164016 6.837906 group zone 0 1 5.0 1 1 5.0 2 1 5.0 3 1 5.0 4 1 5.0 ... ... ... 19939 2 1.0 19940 2 1.0 19941 2 1.0 19942 2 1.0 19943 2 1.0 [19944 rows x 9 columns]
excel_file = "./soybean_reg_data.csv"
# Read the specific sheet from the Excel file
ds = pd.read_csv(excel_file)
df_soybean = ds
print(df_soybean)
geoid year tmp pre irrigation1 lnyield lnfer \ 0 1001.0 2009.0 24.140660 8.808839 0.718851 7.609614 2.022537 1 1001.0 2010.0 25.675484 5.438990 0.830087 7.035731 1.910005 2 1001.0 2011.0 24.544823 5.270162 0.595086 7.466514 2.025641 3 1001.0 2013.0 24.036592 7.514143 0.001241 7.926855 2.906756 4 1003.0 2008.0 24.727230 8.675528 0.001980 7.934111 1.240602 ... ... ... ... ... ... ... ... 17577 55141.0 2016.0 17.486904 7.304833 0.000000 8.134343 2.860158 17578 55141.0 2017.0 16.590237 6.467205 0.000000 7.978877 2.633171 17579 55141.0 2018.0 17.266659 8.373576 0.000000 7.838077 2.639034 17580 55141.0 2019.0 16.238592 8.735889 0.000000 7.912185 2.671287 17581 55141.0 2020.0 16.202635 6.832954 0.000000 8.173033 2.625827 group zone 0 1 5.0 1 1 5.0 2 1 5.0 3 1 5.0 4 1 5.0 ... ... ... 17577 2 1.0 17578 2 1.0 17579 2 1.0 17580 2 1.0 17581 2 1.0 [17582 rows x 9 columns]
# Filter data for irrigated corn
data = df_corn[df_corn['group'] == 1]
# Prepare the interaction terms
data['pre_pre_interaction'] = data['pre'] * data['pre']
data['irrigation12'] = data['irrigation1'] * data['irrigation1']
data['lnfer_irrigation1_pre_interaction'] = data['lnfer'] * data['irrigation1'] * data['pre']
data['lnfer_irrigation12_pre_pre_interaction'] = data['lnfer'] * data['irrigation1'] * data['irrigation1'] * data['pre'] * data['pre']
data['tmp_tmp_interaction'] = data['tmp'] * data['tmp']
data['lnfer_irrigation1_tmp_interaction'] = data['lnfer'] * data['irrigation1'] * data['tmp']
data['lnfer_irrigation12_tmp_tmp_interaction'] = data['lnfer'] * data['irrigation1'] * data['irrigation1'] * data['tmp'] * data['tmp']
# Define the formula for the regression model
formula = 'lnyield ~ lnfer + irrigation1 + irrigation12 + tmp + tmp_tmp_interaction + pre + pre_pre_interaction + C(geoid)'
# Run the regression using the formula interface with clustered standard errors
model = sm.OLS.from_formula(formula, data=data).fit(cov_type='cluster', cov_kwds={'groups': data['geoid']})
# Extract the summary table
summary = model.summary2().tables[1]
# Filter out rows corresponding to dummy variables for geoid
exclude_terms = ['C(geoid)']
pattern = '|'.join([re.escape(term) for term in exclude_terms])
filtered_summary = summary.loc[~summary.index.str.contains(pattern)]
fixed_effects_only_model = sm.OLS.from_formula('lnyield ~ C(geoid)', data=data).fit()
# Calculate the total sum of squares (TSS) using the model with only fixed effects
tss_within = np.sum((fixed_effects_only_model.resid) ** 2)
# Calculate the residual sum of squares (RSS) from your main model
rss_within = np.sum((model.resid) ** 2)
# Calculate the within R-squared
within_r2 = 1 - (rss_within / tss_within)
# Format the OLS summary with the within R-squared
ols_info = f"""
OLS Regression Results
==============================================================================
Dep. Variable: lnyield R-squared: {model.rsquared:.3f}
Model: OLS Adj. R-squared: {model.rsquared_adj:.3f}
Within R-squared: {within_r2:.3f}
Method: Least Squares F-statistic: {model.fvalue:.1f}
Date: {datetime.now():%a, %d %b %Y} Prob (F-statistic): {model.f_pvalue:.2e}
Time: {datetime.now():%H:%M:%S} Log-Likelihood: {model.llf:.2f}
No. Observations: {model.nobs:.0f} AIC: {model.aic:.1f}
Df Residuals: {model.df_resid:.0f} BIC: {model.bic:.1f}
Df Model: {model.df_model:.0f}
Covariance Type: cluster
==============================================================================
"""
# Combine the OLS info with the filtered summary
final_summary = ols_info + "\n" + filtered_summary.to_string()
# Print the final summary
print(final_summary)
/var/folders/vd/0_phd7hx2n51y4412862zww00000gp/T/ipykernel_17445/1274286391.py:5: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame. Try using .loc[row_indexer,col_indexer] = value instead See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy data['pre_pre_interaction'] = data['pre'] * data['pre'] /var/folders/vd/0_phd7hx2n51y4412862zww00000gp/T/ipykernel_17445/1274286391.py:6: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame. Try using .loc[row_indexer,col_indexer] = value instead See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy data['irrigation12'] = data['irrigation1'] * data['irrigation1'] /var/folders/vd/0_phd7hx2n51y4412862zww00000gp/T/ipykernel_17445/1274286391.py:7: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame. Try using .loc[row_indexer,col_indexer] = value instead See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy data['lnfer_irrigation1_pre_interaction'] = data['lnfer'] * data['irrigation1'] * data['pre'] /var/folders/vd/0_phd7hx2n51y4412862zww00000gp/T/ipykernel_17445/1274286391.py:8: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame. Try using .loc[row_indexer,col_indexer] = value instead See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy data['lnfer_irrigation12_pre_pre_interaction'] = data['lnfer'] * data['irrigation1'] * data['irrigation1'] * data['pre'] * data['pre'] /var/folders/vd/0_phd7hx2n51y4412862zww00000gp/T/ipykernel_17445/1274286391.py:10: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame. Try using .loc[row_indexer,col_indexer] = value instead See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy data['tmp_tmp_interaction'] = data['tmp'] * data['tmp'] /var/folders/vd/0_phd7hx2n51y4412862zww00000gp/T/ipykernel_17445/1274286391.py:11: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame. Try using .loc[row_indexer,col_indexer] = value instead See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy data['lnfer_irrigation1_tmp_interaction'] = data['lnfer'] * data['irrigation1'] * data['tmp'] /var/folders/vd/0_phd7hx2n51y4412862zww00000gp/T/ipykernel_17445/1274286391.py:12: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame. Try using .loc[row_indexer,col_indexer] = value instead See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy data['lnfer_irrigation12_tmp_tmp_interaction'] = data['lnfer'] * data['irrigation1'] * data['irrigation1'] * data['tmp'] * data['tmp'] /Users/chenchenren/anaconda3/lib/python3.11/site-packages/statsmodels/base/model.py:1888: ValueWarning: covariance of constraints does not have full rank. The number of constraints is 1508, but rank is 7 warnings.warn('covariance of constraints does not have full '
OLS Regression Results ============================================================================== Dep. Variable: lnyield R-squared: 0.645 Model: OLS Adj. R-squared: 0.589 Within R-squared: 0.177 Method: Least Squares F-statistic: 3255195358504.2 Date: Thu, 29 Aug 2024 Prob (F-statistic): 0.00e+00 Time: 14:46:50 Log-Likelihood: 2533.88 No. Observations: 11044 AIC: -2049.8 Df Residuals: 9535 BIC: 8980.5 Df Model: 1508 Covariance Type: cluster ============================================================================== Coef. Std.Err. z P>|z| [0.025 0.975] Intercept 5.111854 0.307692 16.613552 5.560121e-62 4.508789 5.714919 lnfer 0.460742 0.040436 11.394350 4.461466e-30 0.381489 0.539995 irrigation1 0.033407 0.007911 4.222728 2.413634e-05 0.017901 0.048913 irrigation12 -0.001540 0.000807 -1.909466 5.620195e-02 -0.003121 0.000041 tmp 0.013745 0.022950 0.598910 5.492332e-01 -0.031237 0.058727 tmp_tmp_interaction -0.000760 0.000673 -1.129258 2.587892e-01 -0.002079 0.000559 pre 0.188568 0.012528 15.051895 3.354859e-51 0.164014 0.213122 pre_pre_interaction -0.007849 0.000711 -11.044381 2.333717e-28 -0.009242 -0.006456
# Extract the coefficients for tmp and tmp_tmp_interaction
coeff_tmp = model.params['tmp']
coeff_tmp_tmp_interaction = model.params['tmp_tmp_interaction']
# Define the range of tmp based on the input data
tmp_range = np.linspace(data['tmp'].min(), data['tmp'].max(), 100)
# Calculate predicted lnyield using the quadratic form
lnyield_pred_tmp = coeff_tmp * tmp_range + coeff_tmp_tmp_interaction * (tmp_range ** 2)
# Calculate the standard errors and confidence intervals
# Get the covariance matrix of the parameters
cov_matrix = model.cov_params()
# Calculate the variance of the prediction
pred_var = (cov_matrix.loc['tmp', 'tmp'] * tmp_range**2 +
2 * cov_matrix.loc['tmp', 'tmp_tmp_interaction'] * tmp_range**3 +
cov_matrix.loc['tmp_tmp_interaction', 'tmp_tmp_interaction'] * tmp_range**4)
# Calculate the standard error
pred_se = np.sqrt(pred_var)
# Calculate the 95% confidence interval
ci_lower_tmp = lnyield_pred_tmp - 1.96 * pred_se
ci_upper_tmp = lnyield_pred_tmp + 1.96 * pred_se
# Plotting the results
plt.figure(figsize=(10, 6))
plt.plot(tmp_range, lnyield_pred_tmp, label='Predicted lnyield', color='blue')
plt.fill_between(tmp_range, ci_lower_tmp, ci_upper_tmp, color='blue', alpha=0.2, label='95% CI')
plt.xlabel('tmp')
plt.ylabel('lnyield')
plt.title('Nonlinear Relationship between tmp and lnyield')
plt.legend()
plt.show()
# Extract the coefficients for pre and pre_pre_interaction
coeff_pre = model.params['pre']
coeff_pre_pre_interaction = model.params['pre_pre_interaction']
# Define the range of pre based on the input data
pre_range = np.linspace(np.percentile(data['pre'],1), np.percentile(data['pre'],99), 100)
# Calculate predicted lnyield using the quadratic form
lnyield_pred_pre = coeff_pre * pre_range + coeff_pre_pre_interaction * (pre_range ** 2)-0.8
# Calculate the standard errors and confidence intervals
# Get the covariance matrix of the parameters
cov_matrix = model.cov_params()
# Calculate the variance of the prediction
pred_var = (cov_matrix.loc['pre', 'pre'] * pre_range**2 +
2 * cov_matrix.loc['pre', 'pre_pre_interaction'] * pre_range**3 +
cov_matrix.loc['pre_pre_interaction', 'pre_pre_interaction'] * pre_range**4)
# Calculate the standard error
pred_se = np.sqrt(pred_var)
# Calculate the 95% confidence interval
ci_lower_pre = lnyield_pred_pre - 1.96 * pred_se
ci_upper_pre = lnyield_pred_pre + 1.96 * pred_se
# Plotting the results
plt.figure(figsize=(10, 6))
plt.plot(pre_range, lnyield_pred_pre, label='Predicted lnyield', color='blue')
plt.fill_between(pre_range, ci_lower_pre, ci_upper_pre, color='blue', alpha=0.2, label='95% CI')
plt.xlabel('pre')
plt.ylabel('lnyield')
plt.title('Nonlinear Relationship between pre and lnyield')
plt.legend()
plt.show()
# Filter data for rainfed corn
data = df_corn[df_corn['group'] == 2]
# Prepare the interaction terms
data['pre_pre_interaction'] = data['pre'] * data['pre']
data['lnfer_pre_interaction'] = data['lnfer'] * data['pre']
data['lnfer_pre_pre_interaction'] = data['lnfer'] * data['pre'] * data['pre']
data['tmp_tmp_interaction'] = data['tmp'] * data['tmp']
data['lnfer_tmp_interaction'] = data['lnfer'] * data['tmp']
data['lnfer_tmp_tmp_interaction'] = data['lnfer'] * data['tmp'] * data['tmp']
# Define the formula for the regression model
formula = 'lnyield ~ lnfer + tmp + tmp_tmp_interaction + pre + pre_pre_interaction + C(geoid)'
# Run the regression using the formula interface with clustered standard errors
model = sm.OLS.from_formula(formula, data=data).fit(cov_type='cluster', cov_kwds={'groups': data['geoid']})
# Extract the summary table
summary = model.summary2().tables[1]
# Filter out rows corresponding to dummy variables for geoid
exclude_terms = ['C(geoid)']
pattern = '|'.join([re.escape(term) for term in exclude_terms])
filtered_summary = summary.loc[~summary.index.str.contains(pattern)]
fixed_effects_only_model = sm.OLS.from_formula('lnyield ~ C(geoid)', data=data).fit()
# Calculate the total sum of squares (TSS) using the model with only fixed effects
tss_within = np.sum((fixed_effects_only_model.resid) ** 2)
# Calculate the residual sum of squares (RSS) from your main model
rss_within = np.sum((model.resid) ** 2)
# Calculate the within R-squared
within_r2 = 1 - (rss_within / tss_within)
# Format the OLS summary with the within R-squared
ols_info = f"""
OLS Regression Results
==============================================================================
Dep. Variable: lnyield R-squared: {model.rsquared:.3f}
Model: OLS Adj. R-squared: {model.rsquared_adj:.3f}
Within R-squared: {within_r2:.3f}
Method: Least Squares F-statistic: {model.fvalue:.1f}
Date: {datetime.now():%a, %d %b %Y} Prob (F-statistic): {model.f_pvalue:.2e}
Time: {datetime.now():%H:%M:%S} Log-Likelihood: {model.llf:.2f}
No. Observations: {model.nobs:.0f} AIC: {model.aic:.1f}
Df Residuals: {model.df_resid:.0f} BIC: {model.bic:.1f}
Df Model: {model.df_model:.0f}
Covariance Type: cluster
==============================================================================
"""
# Combine the OLS info with the filtered summary
final_summary = ols_info + "\n" + filtered_summary.to_string()
# Print the final summary
print(final_summary)
/var/folders/vd/0_phd7hx2n51y4412862zww00000gp/T/ipykernel_17445/2811297337.py:5: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame. Try using .loc[row_indexer,col_indexer] = value instead See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy data['pre_pre_interaction'] = data['pre'] * data['pre'] /var/folders/vd/0_phd7hx2n51y4412862zww00000gp/T/ipykernel_17445/2811297337.py:6: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame. Try using .loc[row_indexer,col_indexer] = value instead See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy data['lnfer_pre_interaction'] = data['lnfer'] * data['pre'] /var/folders/vd/0_phd7hx2n51y4412862zww00000gp/T/ipykernel_17445/2811297337.py:7: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame. Try using .loc[row_indexer,col_indexer] = value instead See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy data['lnfer_pre_pre_interaction'] = data['lnfer'] * data['pre'] * data['pre'] /var/folders/vd/0_phd7hx2n51y4412862zww00000gp/T/ipykernel_17445/2811297337.py:9: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame. Try using .loc[row_indexer,col_indexer] = value instead See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy data['tmp_tmp_interaction'] = data['tmp'] * data['tmp'] /var/folders/vd/0_phd7hx2n51y4412862zww00000gp/T/ipykernel_17445/2811297337.py:10: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame. Try using .loc[row_indexer,col_indexer] = value instead See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy data['lnfer_tmp_interaction'] = data['lnfer'] * data['tmp'] /var/folders/vd/0_phd7hx2n51y4412862zww00000gp/T/ipykernel_17445/2811297337.py:11: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame. Try using .loc[row_indexer,col_indexer] = value instead See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy data['lnfer_tmp_tmp_interaction'] = data['lnfer'] * data['tmp'] * data['tmp'] /Users/chenchenren/anaconda3/lib/python3.11/site-packages/statsmodels/base/model.py:1888: ValueWarning: covariance of constraints does not have full rank. The number of constraints is 1349, but rank is 5 warnings.warn('covariance of constraints does not have full '
OLS Regression Results ============================================================================== Dep. Variable: lnyield R-squared: 0.679 Model: OLS Adj. R-squared: 0.622 Within R-squared: 0.213 Method: Least Squares F-statistic: 34780076720926.6 Date: Thu, 29 Aug 2024 Prob (F-statistic): 0.00e+00 Time: 14:46:58 Log-Likelihood: 1728.19 No. Observations: 8900 AIC: -756.4 Df Residuals: 7550 BIC: 8820.3 Df Model: 1349 Covariance Type: cluster ============================================================================== Coef. Std.Err. z P>|z| [0.025 0.975] Intercept 4.172842 0.286888 14.545210 6.263815e-48 3.610552 4.735131 lnfer 0.607814 0.046423 13.092929 3.613962e-39 0.516826 0.698802 tmp 0.147547 0.024813 5.946388 2.741238e-09 0.098915 0.196179 tmp_tmp_interaction -0.005172 0.000751 -6.886305 5.726009e-12 -0.006644 -0.003700 pre 0.166593 0.014075 11.835963 2.544069e-32 0.139006 0.194180 pre_pre_interaction -0.007199 0.000845 -8.516098 1.650193e-17 -0.008856 -0.005543
# Extract the coefficients for tmp and tmp_tmp_interaction
coeff_tmp = model.params['tmp']
coeff_tmp_tmp_interaction = model.params['tmp_tmp_interaction']
# Define the range of tmp based on the input data
tmp_range_rfd = np.linspace(data['tmp'].min(), data['tmp'].max(), 100)
# Calculate predicted lnyield using the quadratic form
lnyield_pred_tmp_rfd = coeff_tmp * tmp_range_rfd + coeff_tmp_tmp_interaction * (tmp_range_rfd ** 2)-0.7
# Calculate the standard errors and confidence intervals
# Get the covariance matrix of the parameters
cov_matrix = model.cov_params()
# Calculate the variance of the prediction
pred_var = (cov_matrix.loc['tmp', 'tmp'] * tmp_range_rfd**2 +
2 * cov_matrix.loc['tmp', 'tmp_tmp_interaction'] * tmp_range_rfd**3 +
cov_matrix.loc['tmp_tmp_interaction', 'tmp_tmp_interaction'] * tmp_range_rfd**4)
# Calculate the standard error
pred_se = np.sqrt(pred_var)
# Calculate the 95% confidence interval
ci_lower_tmp_rfd = lnyield_pred_tmp_rfd - 1.96 * pred_se
ci_upper_tmp_rfd = lnyield_pred_tmp_rfd + 1.96 * pred_se
# Plotting the results
plt.figure(figsize=(10, 6))
plt.plot(tmp_range_rfd, lnyield_pred_tmp_rfd, label='Predicted lnyield', color='blue')
plt.fill_between(tmp_range_rfd, ci_lower_tmp_rfd, ci_upper_tmp_rfd, color='blue', alpha=0.2, label='95% CI')
plt.xlabel('tmp')
plt.ylabel('lnyield')
plt.title('Nonlinear Relationship between tmp and lnyield')
plt.legend()
plt.show()
# Extract the coefficients for pre and pre_pre_interaction
coeff_pre = model.params['pre']
coeff_pre_pre_interaction = model.params['pre_pre_interaction']
# Define the range of pre based on the input data
pre_range_rfd = np.linspace(np.percentile(data['pre'],1), np.percentile(data['pre'],99), 100)
# Calculate predicted lnyield using the quadratic form
lnyield_pred_pre_rfd = coeff_pre * pre_range_rfd + coeff_pre_pre_interaction * (pre_range_rfd ** 2)-0.8
# Calculate the standard errors and confidence intervals
# Get the covariance matrix of the parameters
cov_matrix = model.cov_params()
# Calculate the variance of the prediction
pred_var = (cov_matrix.loc['pre', 'pre'] * pre_range_rfd**2 +
2 * cov_matrix.loc['pre', 'pre_pre_interaction'] * pre_range_rfd**3 +
cov_matrix.loc['pre_pre_interaction', 'pre_pre_interaction'] * pre_range_rfd**4)
# Calculate the standard error
pred_se = np.sqrt(pred_var)
# Calculate the 95% confidence interval
ci_lower_pre_rfd = lnyield_pred_pre_rfd - 1.96 * pred_se
ci_upper_pre_rfd = lnyield_pred_pre_rfd + 1.96 * pred_se
# Plotting the results
plt.figure(figsize=(10, 6))
plt.plot(pre_range_rfd, lnyield_pred_pre_rfd, label='Predicted lnyield', color='blue')
plt.fill_between(pre_range_rfd, ci_lower_pre_rfd, ci_upper_pre_rfd, color='blue', alpha=0.2, label='95% CI')
plt.xlabel('pre')
plt.ylabel('lnyield')
plt.title('Nonlinear Relationship between pre and lnyield')
plt.legend()
plt.show()
# Filter data for irrigated corn
data = df_corn[(df_corn['group'] == 1)&(df_corn['zone'] == 1)]
# Prepare the interaction terms
data['pre_pre_interaction'] = data['pre'] * data['pre']
data['irrigation12'] = data['irrigation1'] * data['irrigation1']
data['lnfer_irrigation1_pre_interaction'] = data['lnfer'] * data['irrigation1'] * data['pre']
data['lnfer_irrigation12_pre_pre_interaction'] = data['lnfer'] * data['irrigation1'] * data['irrigation1'] * data['pre'] * data['pre']
data['tmp_tmp_interaction'] = data['tmp'] * data['tmp']
data['lnfer_irrigation1_tmp_interaction'] = data['lnfer'] * data['irrigation1'] * data['tmp']
data['lnfer_irrigation12_tmp_tmp_interaction'] = data['lnfer'] * data['irrigation1'] * data['irrigation1'] * data['tmp'] * data['tmp']
# Define the formula for the regression model
formula = 'lnyield ~ lnfer + irrigation1 + irrigation12 + tmp + tmp_tmp_interaction + pre + pre_pre_interaction + C(geoid)'
# Run the regression using the formula interface with clustered standard errors
model = sm.OLS.from_formula(formula, data=data).fit(cov_type='cluster', cov_kwds={'groups': data['geoid']})
# Extract the summary table
summary = model.summary2().tables[1]
# Filter out rows corresponding to dummy variables for geoid
exclude_terms = ['C(geoid)']
pattern = '|'.join([re.escape(term) for term in exclude_terms])
filtered_summary = summary.loc[~summary.index.str.contains(pattern)]
fixed_effects_only_model = sm.OLS.from_formula('lnyield ~ C(geoid)', data=data).fit()
# Calculate the total sum of squares (TSS) using the model with only fixed effects
tss_within = np.sum((fixed_effects_only_model.resid) ** 2)
# Calculate the residual sum of squares (RSS) from your main model
rss_within = np.sum((model.resid) ** 2)
# Calculate the within R-squared
within_r2 = 1 - (rss_within / tss_within)
# Format the OLS summary with the within R-squared
ols_info = f"""
OLS Regression Results
==============================================================================
Dep. Variable: lnyield R-squared: {model.rsquared:.3f}
Model: OLS Adj. R-squared: {model.rsquared_adj:.3f}
Within R-squared: {within_r2:.3f}
Method: Least Squares F-statistic: {model.fvalue:.1f}
Date: {datetime.now():%a, %d %b %Y} Prob (F-statistic): {model.f_pvalue:.2e}
Time: {datetime.now():%H:%M:%S} Log-Likelihood: {model.llf:.2f}
No. Observations: {model.nobs:.0f} AIC: {model.aic:.1f}
Df Residuals: {model.df_resid:.0f} BIC: {model.bic:.1f}
Df Model: {model.df_model:.0f}
Covariance Type: cluster
==============================================================================
"""
# Combine the OLS info with the filtered summary
final_summary = ols_info + "\n" + filtered_summary.to_string()
# Print the final summary
print(final_summary)
OLS Regression Results ============================================================================== Dep. Variable: lnyield R-squared: 0.767 Model: OLS Adj. R-squared: 0.736 Within R-squared: 0.195 Method: Least Squares F-statistic: 106.8 Date: Thu, 29 Aug 2024 Prob (F-statistic): 2.33e-57 Time: 14:46:58 Log-Likelihood: 982.95 No. Observations: 1452 AIC: -1625.9 Df Residuals: 1282 BIC: -728.2 Df Model: 169 Covariance Type: cluster ============================================================================== Coef. Std.Err. z P>|z| [0.025 0.975] Intercept 3.052680 0.893944 3.414845 6.381827e-04 1.300582 4.804778 lnfer 0.276984 0.051838 5.343288 9.127539e-08 0.175384 0.378585 irrigation1 0.096729 0.032794 2.949605 3.181808e-03 0.032454 0.161005 irrigation12 -0.007572 0.003135 -2.415187 1.572714e-02 -0.013717 -0.001427 tmp 0.526457 0.109636 4.801871 1.571897e-06 0.311575 0.741339 tmp_tmp_interaction -0.017897 0.003878 -4.614738 3.935915e-06 -0.025499 -0.010296 pre 0.234201 0.042718 5.482543 4.192551e-08 0.150476 0.317926 pre_pre_interaction -0.014771 0.002948 -5.010080 5.440749e-07 -0.020550 -0.008993
/var/folders/vd/0_phd7hx2n51y4412862zww00000gp/T/ipykernel_17445/2640714645.py:5: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame. Try using .loc[row_indexer,col_indexer] = value instead See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy data['pre_pre_interaction'] = data['pre'] * data['pre'] /var/folders/vd/0_phd7hx2n51y4412862zww00000gp/T/ipykernel_17445/2640714645.py:6: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame. Try using .loc[row_indexer,col_indexer] = value instead See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy data['irrigation12'] = data['irrigation1'] * data['irrigation1'] /var/folders/vd/0_phd7hx2n51y4412862zww00000gp/T/ipykernel_17445/2640714645.py:7: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame. Try using .loc[row_indexer,col_indexer] = value instead See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy data['lnfer_irrigation1_pre_interaction'] = data['lnfer'] * data['irrigation1'] * data['pre'] /var/folders/vd/0_phd7hx2n51y4412862zww00000gp/T/ipykernel_17445/2640714645.py:8: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame. Try using .loc[row_indexer,col_indexer] = value instead See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy data['lnfer_irrigation12_pre_pre_interaction'] = data['lnfer'] * data['irrigation1'] * data['irrigation1'] * data['pre'] * data['pre'] /var/folders/vd/0_phd7hx2n51y4412862zww00000gp/T/ipykernel_17445/2640714645.py:10: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame. Try using .loc[row_indexer,col_indexer] = value instead See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy data['tmp_tmp_interaction'] = data['tmp'] * data['tmp'] /var/folders/vd/0_phd7hx2n51y4412862zww00000gp/T/ipykernel_17445/2640714645.py:11: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame. Try using .loc[row_indexer,col_indexer] = value instead See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy data['lnfer_irrigation1_tmp_interaction'] = data['lnfer'] * data['irrigation1'] * data['tmp'] /var/folders/vd/0_phd7hx2n51y4412862zww00000gp/T/ipykernel_17445/2640714645.py:12: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame. Try using .loc[row_indexer,col_indexer] = value instead See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy data['lnfer_irrigation12_tmp_tmp_interaction'] = data['lnfer'] * data['irrigation1'] * data['irrigation1'] * data['tmp'] * data['tmp'] /Users/chenchenren/anaconda3/lib/python3.11/site-packages/statsmodels/base/model.py:1888: ValueWarning: covariance of constraints does not have full rank. The number of constraints is 169, but rank is 7 warnings.warn('covariance of constraints does not have full '
# Extract the coefficients for tmp and tmp_tmp_interaction
coeff_tmp1 = model.params['tmp']
coeff_tmp_tmp_interaction1 = model.params['tmp_tmp_interaction']
# Define the range of tmp based on the input data
tmp_range1 = np.linspace(np.percentile(data['tmp'],5), np.percentile(data['tmp'],95), 100)
# Calculate predicted lnyield using the quadratic form
lnyield_pred_tmp1 = coeff_tmp1 * tmp_range1 + coeff_tmp_tmp_interaction1 * (tmp_range1 ** 2)-3.8
# Calculate the standard errors and confidence intervals
# Plotting the results
plt.figure(figsize=(10, 6))
plt.plot(tmp_range1, lnyield_pred_tmp1, label='Predicted lnyield', color='blue')
plt.xlabel('tmp')
plt.ylabel('lnyield')
plt.title('Nonlinear Relationship between tmp and lnyield')
plt.legend()
plt.show()
# Extract the coefficients for pre and pre_pre_interaction
coeff_pre1 = model.params['pre']
coeff_pre_pre_interaction1 = model.params['pre_pre_interaction']
# Define the range of pre based on the input data
pre_range1 = np.linspace(np.percentile(data['pre'],5), np.percentile(data['pre'],95), 100)
# Calculate predicted lnyield using the quadratic form
lnyield_pred_pre1 = coeff_pre1 * pre_range1 + coeff_pre_pre_interaction1 * (pre_range1 ** 2)-0.8
# Calculate the standard errors and confidence intervals
# Plotting the resultsplt.figure(figsize=(10, 6))
plt.plot(pre_range1, lnyield_pred_pre1, label='Predicted lnyield', color='blue')
plt.xlabel('pre')
plt.ylabel('lnyield')
plt.title('Nonlinear Relationship between pre and lnyield')
plt.legend()
plt.show()
# Filter data for irrigated corn
data = df_corn[(df_corn['group'] == 1)&(df_corn['zone'] == 2)]
# Prepare the interaction terms
data['pre_pre_interaction'] = data['pre'] * data['pre']
data['irrigation12'] = data['irrigation1'] * data['irrigation1']
data['lnfer_irrigation1_pre_interaction'] = data['lnfer'] * data['irrigation1'] * data['pre']
data['lnfer_irrigation12_pre_pre_interaction'] = data['lnfer'] * data['irrigation1'] * data['irrigation1'] * data['pre'] * data['pre']
data['tmp_tmp_interaction'] = data['tmp'] * data['tmp']
data['lnfer_irrigation1_tmp_interaction'] = data['lnfer'] * data['irrigation1'] * data['tmp']
data['lnfer_irrigation12_tmp_tmp_interaction'] = data['lnfer'] * data['irrigation1'] * data['irrigation1'] * data['tmp'] * data['tmp']
# Define the formula for the regression model
formula = 'lnyield ~ lnfer + irrigation1 + irrigation12 + tmp + tmp_tmp_interaction + pre + pre_pre_interaction + C(geoid)'
# Run the regression using the formula interface with clustered standard errors
model = sm.OLS.from_formula(formula, data=data).fit(cov_type='cluster', cov_kwds={'groups': data['geoid']})
# Extract the summary table
summary = model.summary2().tables[1]
# Filter out rows corresponding to dummy variables for geoid
exclude_terms = ['C(geoid)']
pattern = '|'.join([re.escape(term) for term in exclude_terms])
filtered_summary = summary.loc[~summary.index.str.contains(pattern)]
fixed_effects_only_model = sm.OLS.from_formula('lnyield ~ C(geoid)', data=data).fit()
# Calculate the total sum of squares (TSS) using the model with only fixed effects
tss_within = np.sum((fixed_effects_only_model.resid) ** 2)
# Calculate the residual sum of squares (RSS) from your main model
rss_within = np.sum((model.resid) ** 2)
# Calculate the within R-squared
within_r2 = 1 - (rss_within / tss_within)
# Format the OLS summary with the within R-squared
ols_info = f"""
OLS Regression Results
==============================================================================
Dep. Variable: lnyield R-squared: {model.rsquared:.3f}
Model: OLS Adj. R-squared: {model.rsquared_adj:.3f}
Within R-squared: {within_r2:.3f}
Method: Least Squares F-statistic: {model.fvalue:.1f}
Date: {datetime.now():%a, %d %b %Y} Prob (F-statistic): {model.f_pvalue:.2e}
Time: {datetime.now():%H:%M:%S} Log-Likelihood: {model.llf:.2f}
No. Observations: {model.nobs:.0f} AIC: {model.aic:.1f}
Df Residuals: {model.df_resid:.0f} BIC: {model.bic:.1f}
Df Model: {model.df_model:.0f}
Covariance Type: cluster
==============================================================================
"""
# Combine the OLS info with the filtered summary
final_summary = ols_info + "\n" + filtered_summary.to_string()
# Print the final summary
print(final_summary)
/var/folders/vd/0_phd7hx2n51y4412862zww00000gp/T/ipykernel_17445/2701290221.py:5: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame. Try using .loc[row_indexer,col_indexer] = value instead See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy data['pre_pre_interaction'] = data['pre'] * data['pre'] /var/folders/vd/0_phd7hx2n51y4412862zww00000gp/T/ipykernel_17445/2701290221.py:6: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame. Try using .loc[row_indexer,col_indexer] = value instead See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy data['irrigation12'] = data['irrigation1'] * data['irrigation1'] /var/folders/vd/0_phd7hx2n51y4412862zww00000gp/T/ipykernel_17445/2701290221.py:7: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame. Try using .loc[row_indexer,col_indexer] = value instead See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy data['lnfer_irrigation1_pre_interaction'] = data['lnfer'] * data['irrigation1'] * data['pre'] /var/folders/vd/0_phd7hx2n51y4412862zww00000gp/T/ipykernel_17445/2701290221.py:8: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame. Try using .loc[row_indexer,col_indexer] = value instead See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy data['lnfer_irrigation12_pre_pre_interaction'] = data['lnfer'] * data['irrigation1'] * data['irrigation1'] * data['pre'] * data['pre'] /var/folders/vd/0_phd7hx2n51y4412862zww00000gp/T/ipykernel_17445/2701290221.py:10: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame. Try using .loc[row_indexer,col_indexer] = value instead See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy data['tmp_tmp_interaction'] = data['tmp'] * data['tmp'] /var/folders/vd/0_phd7hx2n51y4412862zww00000gp/T/ipykernel_17445/2701290221.py:11: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame. Try using .loc[row_indexer,col_indexer] = value instead See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy data['lnfer_irrigation1_tmp_interaction'] = data['lnfer'] * data['irrigation1'] * data['tmp'] /var/folders/vd/0_phd7hx2n51y4412862zww00000gp/T/ipykernel_17445/2701290221.py:12: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame. Try using .loc[row_indexer,col_indexer] = value instead See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy data['lnfer_irrigation12_tmp_tmp_interaction'] = data['lnfer'] * data['irrigation1'] * data['irrigation1'] * data['tmp'] * data['tmp']
OLS Regression Results ============================================================================== Dep. Variable: lnyield R-squared: 0.770 Model: OLS Adj. R-squared: 0.731 Within R-squared: 0.291 Method: Least Squares F-statistic: -757983258881.0 Date: Thu, 29 Aug 2024 Prob (F-statistic): 1.00e+00 Time: 14:46:59 Log-Likelihood: 1684.51 No. Observations: 2657 AIC: -2605.0 Df Residuals: 2275 BIC: -357.0 Df Model: 381 Covariance Type: cluster ============================================================================== Coef. Std.Err. z P>|z| [0.025 0.975] Intercept 3.933547 0.565241 6.959065 3.425395e-12 2.825695 5.041398 lnfer 0.313007 0.063239 4.949596 7.436765e-07 0.189061 0.436952 irrigation1 0.024007 0.017712 1.355382 1.752960e-01 -0.010709 0.058722 irrigation12 -0.000612 0.001426 -0.429518 6.675460e-01 -0.003406 0.002182 tmp 0.275186 0.056512 4.869494 1.118842e-06 0.164424 0.385948 tmp_tmp_interaction -0.008927 0.001766 -5.056012 4.281136e-07 -0.012387 -0.005466 pre 0.335094 0.030346 11.042593 2.380621e-28 0.275618 0.394570 pre_pre_interaction -0.019440 0.001906 -10.200468 1.973184e-24 -0.023175 -0.015705
/Users/chenchenren/anaconda3/lib/python3.11/site-packages/statsmodels/base/model.py:1888: ValueWarning: covariance of constraints does not have full rank. The number of constraints is 381, but rank is 7 warnings.warn('covariance of constraints does not have full '
# Extract the coefficients for tmp and tmp_tmp_interaction
coeff_tmp2 = model.params['tmp']
coeff_tmp_tmp_interaction2 = model.params['tmp_tmp_interaction']
# Define the range of tmp based on the input data
tmp_range2 = np.linspace(np.percentile(data['tmp'],5), np.percentile(data['tmp'],95), 100)
# Calculate predicted lnyield using the quadratic form
lnyield_pred_tmp2 = coeff_tmp2 * tmp_range2 + coeff_tmp_tmp_interaction2 * (tmp_range2 ** 2)-2.1
# Calculate the standard errors and confidence intervals
# Plotting the results
plt.figure(figsize=(10, 6))
plt.plot(tmp_range2, lnyield_pred_tmp2, label='Predicted lnyield', color='blue')
plt.xlabel('tmp')
plt.ylabel('lnyield')
plt.title('Nonlinear Relationship between tmp and lnyield')
plt.legend()
plt.show()
# Extract the coefficients for pre and pre_pre_interaction
coeff_pre2 = model.params['pre']
coeff_pre_pre_interaction2 = model.params['pre_pre_interaction']
# Define the range of pre based on the input data
pre_range2 = np.linspace(np.percentile(data['pre'],5), np.percentile(data['pre'],95), 100)
# Calculate predicted lnyield using the quadratic form
lnyield_pred_pre2 = coeff_pre2 * pre_range2 + coeff_pre_pre_interaction2 * (pre_range2 ** 2)-1.2
# Calculate the standard errors and confidence intervals
# Plotting the results
plt.figure(figsize=(10, 6))
plt.plot(pre_range2, lnyield_pred_pre2, label='Predicted lnyield', color='blue')
plt.xlabel('pre')
plt.ylabel('lnyield')
plt.title('Nonlinear Relationship between pre and lnyield')
plt.legend()
plt.show()
# Filter data for irrigated corn
data = df_corn[(df_corn['group'] == 1)&(df_corn['zone'] == 3)]
# Prepare the interaction terms
data['pre_pre_interaction'] = data['pre'] * data['pre']
data['irrigation12'] = data['irrigation1'] * data['irrigation1']
data['lnfer_irrigation1_pre_interaction'] = data['lnfer'] * data['irrigation1'] * data['pre']
data['lnfer_irrigation12_pre_pre_interaction'] = data['lnfer'] * data['irrigation1'] * data['irrigation1'] * data['pre'] * data['pre']
data['tmp_tmp_interaction'] = data['tmp'] * data['tmp']
data['lnfer_irrigation1_tmp_interaction'] = data['lnfer'] * data['irrigation1'] * data['tmp']
data['lnfer_irrigation12_tmp_tmp_interaction'] = data['lnfer'] * data['irrigation1'] * data['irrigation1'] * data['tmp'] * data['tmp']
# Define the formula for the regression model
formula = 'lnyield ~ lnfer + irrigation1 + irrigation12 + tmp + tmp_tmp_interaction + pre + pre_pre_interaction + C(geoid)'
# Run the regression using the formula interface with clustered standard errors
model = sm.OLS.from_formula(formula, data=data).fit(cov_type='cluster', cov_kwds={'groups': data['geoid']})
# Extract the summary table
summary = model.summary2().tables[1]
# Filter out rows corresponding to dummy variables for geoid
exclude_terms = ['C(geoid)']
pattern = '|'.join([re.escape(term) for term in exclude_terms])
filtered_summary = summary.loc[~summary.index.str.contains(pattern)]
fixed_effects_only_model = sm.OLS.from_formula('lnyield ~ C(geoid)', data=data).fit()
# Calculate the total sum of squares (TSS) using the model with only fixed effects
tss_within = np.sum((fixed_effects_only_model.resid) ** 2)
# Calculate the residual sum of squares (RSS) from your main model
rss_within = np.sum((model.resid) ** 2)
# Calculate the within R-squared
within_r2 = 1 - (rss_within / tss_within)
# Format the OLS summary with the within R-squared
ols_info = f"""
OLS Regression Results
==============================================================================
Dep. Variable: lnyield R-squared: {model.rsquared:.3f}
Model: OLS Adj. R-squared: {model.rsquared_adj:.3f}
Within R-squared: {within_r2:.3f}
Method: Least Squares F-statistic: {model.fvalue:.1f}
Date: {datetime.now():%a, %d %b %Y} Prob (F-statistic): {model.f_pvalue:.2e}
Time: {datetime.now():%H:%M:%S} Log-Likelihood: {model.llf:.2f}
No. Observations: {model.nobs:.0f} AIC: {model.aic:.1f}
Df Residuals: {model.df_resid:.0f} BIC: {model.bic:.1f}
Df Model: {model.df_model:.0f}
Covariance Type: cluster
==============================================================================
"""
# Combine the OLS info with the filtered summary
final_summary = ols_info + "\n" + filtered_summary.to_string()
# Print the final summary
print(final_summary)
/var/folders/vd/0_phd7hx2n51y4412862zww00000gp/T/ipykernel_17445/867551411.py:5: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame. Try using .loc[row_indexer,col_indexer] = value instead See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy data['pre_pre_interaction'] = data['pre'] * data['pre'] /var/folders/vd/0_phd7hx2n51y4412862zww00000gp/T/ipykernel_17445/867551411.py:6: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame. Try using .loc[row_indexer,col_indexer] = value instead See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy data['irrigation12'] = data['irrigation1'] * data['irrigation1'] /var/folders/vd/0_phd7hx2n51y4412862zww00000gp/T/ipykernel_17445/867551411.py:7: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame. Try using .loc[row_indexer,col_indexer] = value instead See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy data['lnfer_irrigation1_pre_interaction'] = data['lnfer'] * data['irrigation1'] * data['pre'] /var/folders/vd/0_phd7hx2n51y4412862zww00000gp/T/ipykernel_17445/867551411.py:8: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame. Try using .loc[row_indexer,col_indexer] = value instead See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy data['lnfer_irrigation12_pre_pre_interaction'] = data['lnfer'] * data['irrigation1'] * data['irrigation1'] * data['pre'] * data['pre'] /var/folders/vd/0_phd7hx2n51y4412862zww00000gp/T/ipykernel_17445/867551411.py:10: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame. Try using .loc[row_indexer,col_indexer] = value instead See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy data['tmp_tmp_interaction'] = data['tmp'] * data['tmp'] /var/folders/vd/0_phd7hx2n51y4412862zww00000gp/T/ipykernel_17445/867551411.py:11: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame. Try using .loc[row_indexer,col_indexer] = value instead See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy data['lnfer_irrigation1_tmp_interaction'] = data['lnfer'] * data['irrigation1'] * data['tmp'] /var/folders/vd/0_phd7hx2n51y4412862zww00000gp/T/ipykernel_17445/867551411.py:12: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame. Try using .loc[row_indexer,col_indexer] = value instead See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy data['lnfer_irrigation12_tmp_tmp_interaction'] = data['lnfer'] * data['irrigation1'] * data['irrigation1'] * data['tmp'] * data['tmp']
OLS Regression Results ============================================================================== Dep. Variable: lnyield R-squared: 0.619 Model: OLS Adj. R-squared: 0.553 Within R-squared: 0.262 Method: Least Squares F-statistic: -2559438698182.4 Date: Thu, 29 Aug 2024 Prob (F-statistic): 1.00e+00 Time: 14:49:30 Log-Likelihood: 715.60 No. Observations: 2826 AIC: -603.2 Df Residuals: 2412 BIC: 1858.7 Df Model: 413 Covariance Type: cluster ============================================================================== Coef. Std.Err. z P>|z| [0.025 0.975] Intercept 6.439422 0.661505 9.734504 2.148642e-22 5.142896 7.735948 lnfer 0.465042 0.083678 5.557544 2.735973e-08 0.301037 0.629048 irrigation1 0.023505 0.018396 1.277732 2.013441e-01 -0.012550 0.059560 irrigation12 -0.000226 0.001662 -0.135874 8.919207e-01 -0.003483 0.003032 tmp -0.007906 0.065092 -0.121466 9.033219e-01 -0.135484 0.119671 tmp_tmp_interaction -0.001382 0.001846 -0.748837 4.539556e-01 -0.005000 0.002235 pre 0.251086 0.028952 8.672475 4.228260e-18 0.194341 0.307831 pre_pre_interaction -0.010942 0.001708 -6.408144 1.473022e-10 -0.014289 -0.007596
/Users/chenchenren/anaconda3/lib/python3.11/site-packages/statsmodels/base/model.py:1888: ValueWarning: covariance of constraints does not have full rank. The number of constraints is 413, but rank is 7 warnings.warn('covariance of constraints does not have full '
# Extract the coefficients for tmp and tmp_tmp_interaction
coeff_tmp3 = model.params['tmp']
coeff_tmp_tmp_interaction3 = model.params['tmp_tmp_interaction']
# Define the range of tmp based on the input data
tmp_range3 = np.linspace(np.percentile(data['tmp'],5), np.percentile(data['tmp'],95), 100)
# Calculate predicted lnyield using the quadratic form
lnyield_pred_tmp3 = coeff_tmp3 * tmp_range3 + coeff_tmp_tmp_interaction3 * (tmp_range3 ** 2)+0.5
# Calculate the standard errors and confidence intervals
# Plotting the results
plt.figure(figsize=(10, 6))
plt.plot(tmp_range3, lnyield_pred_tmp3, label='Predicted lnyield', color='blue')
plt.xlabel('tmp')
plt.ylabel('lnyield')
plt.title('Nonlinear Relationship between tmp and lnyield')
plt.legend()
plt.show()
# Extract the coefficients for pre and pre_pre_interaction
coeff_pre3 = model.params['pre']
coeff_pre_pre_interaction3 = model.params['pre_pre_interaction']
# Define the range of pre based on the input data
pre_range3 = np.linspace(np.percentile(data['pre'],5), np.percentile(data['pre'],95), 100)
# Calculate predicted lnyield using the quadratic form
lnyield_pred_pre3 = coeff_pre3 * pre_range3 + coeff_pre_pre_interaction3 * (pre_range3 ** 2)-1.1
# Calculate the standard errors and confidence intervals
# Plotting the results
plt.figure(figsize=(10, 6))
plt.plot(pre_range3, lnyield_pred_pre3, label='Predicted lnyield', color='blue')
plt.xlabel('pre')
plt.ylabel('lnyield')
plt.title('Nonlinear Relationship between pre and lnyield')
plt.legend()
plt.show()
# Filter data for irrigated corn
data = df_corn[(df_corn['group'] == 1)&(df_corn['zone'] == 4)]
# Prepare the interaction terms
data['pre_pre_interaction'] = data['pre'] * data['pre']
data['irrigation12'] = data['irrigation1'] * data['irrigation1']
data['lnfer_irrigation1_pre_interaction'] = data['lnfer'] * data['irrigation1'] * data['pre']
data['lnfer_irrigation12_pre_pre_interaction'] = data['lnfer'] * data['irrigation1'] * data['irrigation1'] * data['pre'] * data['pre']
data['tmp_tmp_interaction'] = data['tmp'] * data['tmp']
data['lnfer_irrigation1_tmp_interaction'] = data['lnfer'] * data['irrigation1'] * data['tmp']
data['lnfer_irrigation12_tmp_tmp_interaction'] = data['lnfer'] * data['irrigation1'] * data['irrigation1'] * data['tmp'] * data['tmp']
# Define the formula for the regression model
formula = 'lnyield ~ lnfer + irrigation1 + irrigation12 + tmp + tmp_tmp_interaction + pre + pre_pre_interaction + C(geoid)'
# Run the regression using the formula interface with clustered standard errors
model = sm.OLS.from_formula(formula, data=data).fit(cov_type='cluster', cov_kwds={'groups': data['geoid']})
# Extract the summary table
summary = model.summary2().tables[1]
# Filter out rows corresponding to dummy variables for geoid
exclude_terms = ['C(geoid)']
pattern = '|'.join([re.escape(term) for term in exclude_terms])
filtered_summary = summary.loc[~summary.index.str.contains(pattern)]
fixed_effects_only_model = sm.OLS.from_formula('lnyield ~ C(geoid)', data=data).fit()
# Calculate the total sum of squares (TSS) using the model with only fixed effects
tss_within = np.sum((fixed_effects_only_model.resid) ** 2)
# Calculate the residual sum of squares (RSS) from your main model
rss_within = np.sum((model.resid) ** 2)
# Calculate the within R-squared
within_r2 = 1 - (rss_within / tss_within)
# Format the OLS summary with the within R-squared
ols_info = f"""
OLS Regression Results
==============================================================================
Dep. Variable: lnyield R-squared: {model.rsquared:.3f}
Model: OLS Adj. R-squared: {model.rsquared_adj:.3f}
Within R-squared: {within_r2:.3f}
Method: Least Squares F-statistic: {model.fvalue:.1f}
Date: {datetime.now():%a, %d %b %Y} Prob (F-statistic): {model.f_pvalue:.2e}
Time: {datetime.now():%H:%M:%S} Log-Likelihood: {model.llf:.2f}
No. Observations: {model.nobs:.0f} AIC: {model.aic:.1f}
Df Residuals: {model.df_resid:.0f} BIC: {model.bic:.1f}
Df Model: {model.df_model:.0f}
Covariance Type: cluster
==============================================================================
"""
# Combine the OLS info with the filtered summary
final_summary = ols_info + "\n" + filtered_summary.to_string()
# Print the final summary
print(final_summary)
/var/folders/vd/0_phd7hx2n51y4412862zww00000gp/T/ipykernel_17445/3225580690.py:5: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame. Try using .loc[row_indexer,col_indexer] = value instead See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy data['pre_pre_interaction'] = data['pre'] * data['pre'] /var/folders/vd/0_phd7hx2n51y4412862zww00000gp/T/ipykernel_17445/3225580690.py:6: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame. Try using .loc[row_indexer,col_indexer] = value instead See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy data['irrigation12'] = data['irrigation1'] * data['irrigation1'] /var/folders/vd/0_phd7hx2n51y4412862zww00000gp/T/ipykernel_17445/3225580690.py:7: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame. Try using .loc[row_indexer,col_indexer] = value instead See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy data['lnfer_irrigation1_pre_interaction'] = data['lnfer'] * data['irrigation1'] * data['pre'] /var/folders/vd/0_phd7hx2n51y4412862zww00000gp/T/ipykernel_17445/3225580690.py:8: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame. Try using .loc[row_indexer,col_indexer] = value instead See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy data['lnfer_irrigation12_pre_pre_interaction'] = data['lnfer'] * data['irrigation1'] * data['irrigation1'] * data['pre'] * data['pre'] /var/folders/vd/0_phd7hx2n51y4412862zww00000gp/T/ipykernel_17445/3225580690.py:10: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame. Try using .loc[row_indexer,col_indexer] = value instead See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy data['tmp_tmp_interaction'] = data['tmp'] * data['tmp'] /var/folders/vd/0_phd7hx2n51y4412862zww00000gp/T/ipykernel_17445/3225580690.py:11: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame. Try using .loc[row_indexer,col_indexer] = value instead See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy data['lnfer_irrigation1_tmp_interaction'] = data['lnfer'] * data['irrigation1'] * data['tmp'] /var/folders/vd/0_phd7hx2n51y4412862zww00000gp/T/ipykernel_17445/3225580690.py:12: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame. Try using .loc[row_indexer,col_indexer] = value instead See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy data['lnfer_irrigation12_tmp_tmp_interaction'] = data['lnfer'] * data['irrigation1'] * data['irrigation1'] * data['tmp'] * data['tmp'] /Users/chenchenren/anaconda3/lib/python3.11/site-packages/statsmodels/base/model.py:1888: ValueWarning: covariance of constraints does not have full rank. The number of constraints is 380, but rank is 7 warnings.warn('covariance of constraints does not have full '
OLS Regression Results ============================================================================== Dep. Variable: lnyield R-squared: 0.556 Model: OLS Adj. R-squared: 0.489 Within R-squared: 0.233 Method: Least Squares F-statistic: 2085587338502.0 Date: Thu, 29 Aug 2024 Prob (F-statistic): 0.00e+00 Time: 14:49:31 Log-Likelihood: 99.75 No. Observations: 2908 AIC: 562.5 Df Residuals: 2527 BIC: 2839.1 Df Model: 380 Covariance Type: cluster ============================================================================== Coef. Std.Err. z P>|z| [0.025 0.975] Intercept 3.279064 1.390753 2.357762 1.838550e-02 0.553238 6.004890 lnfer 0.694910 0.105472 6.588591 4.440189e-11 0.488189 0.901630 irrigation1 0.034188 0.012442 2.747852 5.998706e-03 0.009803 0.058573 irrigation12 -0.001114 0.001390 -0.801063 4.230950e-01 -0.003839 0.001611 tmp -0.072687 0.128949 -0.563688 5.729664e-01 -0.325421 0.180048 tmp_tmp_interaction 0.001571 0.003225 0.486982 6.262710e-01 -0.004750 0.007891 pre 0.362097 0.039210 9.234787 2.588012e-20 0.285247 0.438948 pre_pre_interaction -0.016332 0.002041 -8.001175 1.232376e-15 -0.020333 -0.012332
# Extract the coefficients for tmp and tmp_tmp_interaction
coeff_tmp4 = model.params['tmp']
coeff_tmp_tmp_interaction4 = model.params['tmp_tmp_interaction']
# Define the range of tmp based on the input data
tmp_range4 = np.linspace(np.percentile(data['tmp'],5), np.percentile(data['tmp'],95), 100)
# Calculate predicted lnyield using the quadratic form
lnyield_pred_tmp4 = coeff_tmp4 * tmp_range4 + coeff_tmp_tmp_interaction4 * (tmp_range4 ** 2)+0.9
# Calculate the standard errors and confidence intervals
# Plotting the results
plt.figure(figsize=(10, 6))
plt.plot(tmp_range4, lnyield_pred_tmp4, label='Predicted lnyield', color='blue')
plt.xlabel('tmp')
plt.ylabel('lnyield')
plt.title('Nonlinear Relationship between tmp and lnyield')
plt.legend()
plt.show()
# Extract the coefficients for pre and pre_pre_interaction
coeff_pre4 = model.params['pre']
coeff_pre_pre_interaction4 = model.params['pre_pre_interaction']
# Define the range of pre based on the input data
pre_range4 = np.linspace(np.percentile(data['pre'],5), np.percentile(data['pre'],95), 100)
# Calculate predicted lnyield using the quadratic form
lnyield_pred_pre4 = coeff_pre4 * pre_range4 + coeff_pre_pre_interaction4 * (pre_range4 ** 2)-1.7
# Calculate the standard errors and confidence intervals
# Plotting the results
plt.figure(figsize=(10, 6))
plt.plot(pre_range4, lnyield_pred_pre4, label='Predicted lnyield', color='blue')
plt.xlabel('pre')
plt.ylabel('lnyield')
plt.title('Nonlinear Relationship between pre and lnyield')
plt.legend()
plt.show()
# Filter data for irrigated corn
data = df_corn[(df_corn['group'] == 1)&(df_corn['zone'] == 5)]
# Prepare the interaction terms
data['pre_pre_interaction'] = data['pre'] * data['pre']
data['irrigation12'] = data['irrigation1'] * data['irrigation1']
data['lnfer_irrigation1_pre_interaction'] = data['lnfer'] * data['irrigation1'] * data['pre']
data['lnfer_irrigation12_pre_pre_interaction'] = data['lnfer'] * data['irrigation1'] * data['irrigation1'] * data['pre'] * data['pre']
data['tmp_tmp_interaction'] = data['tmp'] * data['tmp']
data['lnfer_irrigation1_tmp_interaction'] = data['lnfer'] * data['irrigation1'] * data['tmp']
data['lnfer_irrigation12_tmp_tmp_interaction'] = data['lnfer'] * data['irrigation1'] * data['irrigation1'] * data['tmp'] * data['tmp']
# Define the formula for the regression model
formula = 'lnyield ~ lnfer + irrigation1 + irrigation12 + tmp + tmp_tmp_interaction + pre + pre_pre_interaction + C(geoid)'
# Run the regression using the formula interface with clustered standard errors
model = sm.OLS.from_formula(formula, data=data).fit(cov_type='cluster', cov_kwds={'groups': data['geoid']})
# Extract the summary table
summary = model.summary2().tables[1]
# Filter out rows corresponding to dummy variables for geoid
exclude_terms = ['C(geoid)']
pattern = '|'.join([re.escape(term) for term in exclude_terms])
filtered_summary = summary.loc[~summary.index.str.contains(pattern)]
fixed_effects_only_model = sm.OLS.from_formula('lnyield ~ C(geoid)', data=data).fit()
# Calculate the total sum of squares (TSS) using the model with only fixed effects
tss_within = np.sum((fixed_effects_only_model.resid) ** 2)
# Calculate the residual sum of squares (RSS) from your main model
rss_within = np.sum((model.resid) ** 2)
# Calculate the within R-squared
within_r2 = 1 - (rss_within / tss_within)
# Format the OLS summary with the within R-squared
ols_info = f"""
OLS Regression Results
==============================================================================
Dep. Variable: lnyield R-squared: {model.rsquared:.3f}
Model: OLS Adj. R-squared: {model.rsquared_adj:.3f}
Within R-squared: {within_r2:.3f}
Method: Least Squares F-statistic: {model.fvalue:.1f}
Date: {datetime.now():%a, %d %b %Y} Prob (F-statistic): {model.f_pvalue:.2e}
Time: {datetime.now():%H:%M:%S} Log-Likelihood: {model.llf:.2f}
No. Observations: {model.nobs:.0f} AIC: {model.aic:.1f}
Df Residuals: {model.df_resid:.0f} BIC: {model.bic:.1f}
Df Model: {model.df_model:.0f}
Covariance Type: cluster
==============================================================================
"""
# Combine the OLS info with the filtered summary
final_summary = ols_info + "\n" + filtered_summary.to_string()
# Print the final summary
print(final_summary)
OLS Regression Results ============================================================================== Dep. Variable: lnyield R-squared: 0.656 Model: OLS Adj. R-squared: 0.591 Within R-squared: 0.092 Method: Least Squares F-statistic: 3743719045357.8 Date: Thu, 29 Aug 2024 Prob (F-statistic): 0.00e+00 Time: 14:49:31 Log-Likelihood: 102.65 No. Observations: 1201 AIC: 174.7 Df Residuals: 1011 BIC: 1142.0 Df Model: 189 Covariance Type: cluster ============================================================================== Coef. Std.Err. z P>|z| [0.025 0.975] Intercept 13.559728 1.563309 8.673733 4.181786e-18 10.495698 16.623758 lnfer 0.222588 0.064952 3.426950 6.104016e-04 0.095284 0.349892 irrigation1 -0.001439 0.016073 -0.089508 9.286785e-01 -0.032940 0.030063 irrigation12 -0.000545 0.001343 -0.405702 6.849615e-01 -0.003177 0.002087 tmp -0.632408 0.139262 -4.541141 5.595053e-06 -0.905356 -0.359459 tmp_tmp_interaction 0.015438 0.003119 4.949351 7.446145e-07 0.009325 0.021552 pre 0.062186 0.029144 2.133737 3.286435e-02 0.005064 0.119307 pre_pre_interaction -0.001045 0.001525 -0.685619 4.929534e-01 -0.004034 0.001943
/var/folders/vd/0_phd7hx2n51y4412862zww00000gp/T/ipykernel_17445/746417828.py:5: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame. Try using .loc[row_indexer,col_indexer] = value instead See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy data['pre_pre_interaction'] = data['pre'] * data['pre'] /var/folders/vd/0_phd7hx2n51y4412862zww00000gp/T/ipykernel_17445/746417828.py:6: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame. Try using .loc[row_indexer,col_indexer] = value instead See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy data['irrigation12'] = data['irrigation1'] * data['irrigation1'] /var/folders/vd/0_phd7hx2n51y4412862zww00000gp/T/ipykernel_17445/746417828.py:7: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame. Try using .loc[row_indexer,col_indexer] = value instead See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy data['lnfer_irrigation1_pre_interaction'] = data['lnfer'] * data['irrigation1'] * data['pre'] /var/folders/vd/0_phd7hx2n51y4412862zww00000gp/T/ipykernel_17445/746417828.py:8: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame. Try using .loc[row_indexer,col_indexer] = value instead See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy data['lnfer_irrigation12_pre_pre_interaction'] = data['lnfer'] * data['irrigation1'] * data['irrigation1'] * data['pre'] * data['pre'] /var/folders/vd/0_phd7hx2n51y4412862zww00000gp/T/ipykernel_17445/746417828.py:10: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame. Try using .loc[row_indexer,col_indexer] = value instead See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy data['tmp_tmp_interaction'] = data['tmp'] * data['tmp'] /var/folders/vd/0_phd7hx2n51y4412862zww00000gp/T/ipykernel_17445/746417828.py:11: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame. Try using .loc[row_indexer,col_indexer] = value instead See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy data['lnfer_irrigation1_tmp_interaction'] = data['lnfer'] * data['irrigation1'] * data['tmp'] /var/folders/vd/0_phd7hx2n51y4412862zww00000gp/T/ipykernel_17445/746417828.py:12: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame. Try using .loc[row_indexer,col_indexer] = value instead See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy data['lnfer_irrigation12_tmp_tmp_interaction'] = data['lnfer'] * data['irrigation1'] * data['irrigation1'] * data['tmp'] * data['tmp'] /Users/chenchenren/anaconda3/lib/python3.11/site-packages/statsmodels/base/model.py:1888: ValueWarning: covariance of constraints does not have full rank. The number of constraints is 189, but rank is 7 warnings.warn('covariance of constraints does not have full '
# Extract the coefficients for tmp and tmp_tmp_interaction
coeff_tmp5 = model.params['tmp']
coeff_tmp_tmp_interaction5 = model.params['tmp_tmp_interaction']
# Define the range of tmp based on the input data
tmp_range5 = np.linspace(np.percentile(data['tmp'],5), np.percentile(data['tmp'],95), 100)
# Calculate predicted lnyield using the quadratic form
lnyield_pred_tmp5 = coeff_tmp5 * tmp_range5 + coeff_tmp_tmp_interaction5 * (tmp_range5 ** 2)+6.4
# Calculate the standard errors and confidence intervals
# Plotting the results
plt.figure(figsize=(10, 6))
plt.plot(tmp_range5, lnyield_pred_tmp5, label='Predicted lnyield', color='blue')
plt.xlabel('tmp')
plt.ylabel('lnyield')
plt.title('Nonlinear Relationship between tmp and lnyield')
plt.legend()
plt.show()
# Extract the coefficients for pre and pre_pre_interaction
coeff_pre5 = model.params['pre']
coeff_pre_pre_interaction5 = model.params['pre_pre_interaction']
# Define the range of pre based on the input data
pre_range5 = np.linspace(np.percentile(data['pre'],5), np.percentile(data['pre'],95), 100)
# Calculate predicted lnyield using the quadratic form
lnyield_pred_pre5 = coeff_pre5 * pre_range5 + coeff_pre_pre_interaction5 * (pre_range5 ** 2)-0.3
# Calculate the standard errors and confidence intervals
# Plotting the results
plt.figure(figsize=(10, 6))
plt.plot(pre_range5, lnyield_pred_pre5, label='Predicted lnyield', color='blue')
plt.xlabel('pre')
plt.ylabel('lnyield')
plt.title('Nonlinear Relationship between pre and lnyield')
plt.legend()
plt.show()
# Import the necessary library for setting fonts
from matplotlib import rcParams
# Set font to Arial and size to 12
rcParams['font.family'] = 'Arial'
rcParams['font.size'] = 16
# Define a list of (tmp_range, lnyield_pred) tuples for the five models
models = [(tmp_range1, lnyield_pred_tmp1),
(tmp_range2, lnyield_pred_tmp2),
(tmp_range3, lnyield_pred_tmp3),
(tmp_range4, lnyield_pred_tmp4),
(tmp_range5, lnyield_pred_tmp5)]
# Plotting the results
plt.figure(figsize=(4, 4))
# Plot the first model (One model)
plt.plot(tmp_range, lnyield_pred_tmp, label='One model', color='blue', linewidth=4)
plt.fill_between(tmp_range, ci_lower_tmp, ci_upper_tmp, color='blue', alpha=0.2, label='95% CI')
# Plot the five models
for idx, (tmp_range_, lnyield_pred_) in enumerate(models):
if idx == 0:
plt.plot(tmp_range_, lnyield_pred_, color='green', label='Five models')
else:
plt.plot(tmp_range_, lnyield_pred_, color='green')
# Add labels and title
plt.xlabel('Temperature (°C)',fontsize=18, fontfamily='Arial')
plt.ylabel('Ln Yield (kg/ha/yr)', fontsize=18, fontfamily='Arial')
x_ticks = np.linspace(tmp_range.min(), tmp_range.max(), num=5) # Custom X tick positions
y_ticks = np.linspace(-2, 2.5, num=5) # Custom Y tick positions
plt.xticks(ticks=x_ticks, labels=[f'{x:.1f}' for x in x_ticks]) # Custom X tick labels
plt.yticks(ticks=y_ticks, labels=[f'{y:.1f}' for y in y_ticks]) # Custom Y tick labels
plt.tick_params(axis='both', which='major', labelsize=16) # Adjust tick label size
# Save the entire figure as a JPG with DPI 300
plt.savefig('maize_irr_tem.jpg', format='jpg', dpi=300, bbox_inches='tight', pad_inches=0.1)
# Show the plot
plt.show()
# Set font to Arial and size to 12
rcParams['font.family'] = 'Arial'
rcParams['font.size'] = 16
# Define a list of (pre_range, lnyield_pred) tuples for the five models
models = [(pre_range1, lnyield_pred_pre1),
(pre_range2, lnyield_pred_pre2),
(pre_range3, lnyield_pred_pre3),
(pre_range4, lnyield_pred_pre4),
(pre_range5, lnyield_pred_pre5)]
# Plotting the results
plt.figure(figsize=(4, 4))
# Plot the first model (One model)
plt.plot(pre_range, lnyield_pred_pre, label='One model', color='blue', linewidth=4)
plt.fill_between(pre_range, ci_lower_pre, ci_upper_pre, color='blue', alpha=0.2, label='95% CI')
# Plot the five models
for idx, (pre_range_, lnyield_pred_) in enumerate(models):
if idx == 0:
plt.plot(pre_range_, lnyield_pred_, color='green', label='Five models')
else:
plt.plot(pre_range_, lnyield_pred_, color='green')
# Add labels and title
plt.xlabel('Precipitation (100mm)',fontsize=18, fontfamily='Arial')
plt.ylabel('Ln Yield (kg/ha/yr)', fontsize=18, fontfamily='Arial')
x_ticks = np.linspace(2, 12, num=5) # Custom X tick positions
y_ticks = np.linspace(-0.4, 0.4, num=4) # Custom Y tick positions
plt.xticks(ticks=x_ticks, labels=[f'{x:.1f}' for x in x_ticks]) # Custom X tick labels
plt.yticks(ticks=y_ticks, labels=[f'{y:.1f}' for y in y_ticks]) # Custom Y tick labels
plt.tick_params(axis='both', which='major', labelsize=16) # Adjust tick label size
plt.legend()
# Save the entire figure as a JPG with DPI 300
plt.savefig('maize_irr_pre.jpg', format='jpg', dpi=300, bbox_inches='tight', pad_inches=0.1)
# Show the plot
plt.show()
# Filter data for rainfed corn
data = df_corn[(df_corn['group'] == 2)&((df_corn['zone'] == 1))]
# Prepare the interaction terms
data['pre_pre_interaction'] = data['pre'] * data['pre']
data['lnfer_pre_interaction'] = data['lnfer'] * data['pre']
data['lnfer_pre_pre_interaction'] = data['lnfer'] * data['pre'] * data['pre']
data['tmp_tmp_interaction'] = data['tmp'] * data['tmp']
data['lnfer_tmp_interaction'] = data['lnfer'] * data['tmp']
data['lnfer_tmp_tmp_interaction'] = data['lnfer'] * data['tmp'] * data['tmp']
# Define the formula for the regression model
formula = 'lnyield ~ lnfer + tmp + tmp_tmp_interaction + pre + pre_pre_interaction + C(geoid)'
# Run the regression using the formula interface with clustered standard errors
model = sm.OLS.from_formula(formula, data=data).fit(cov_type='cluster', cov_kwds={'groups': data['geoid']})
# Extract the summary table
summary = model.summary2().tables[1]
# Filter out rows corresponding to dummy variables for geoid
exclude_terms = ['C(geoid)']
pattern = '|'.join([re.escape(term) for term in exclude_terms])
filtered_summary = summary.loc[~summary.index.str.contains(pattern)]
fixed_effects_only_model = sm.OLS.from_formula('lnyield ~ C(geoid)', data=data).fit()
# Calculate the total sum of squares (TSS) using the model with only fixed effects
tss_within = np.sum((fixed_effects_only_model.resid) ** 2)
# Calculate the residual sum of squares (RSS) from your main model
rss_within = np.sum((model.resid) ** 2)
# Calculate the within R-squared
within_r2 = 1 - (rss_within / tss_within)
# Format the OLS summary with the within R-squared
ols_info = f"""
OLS Regression Results
==============================================================================
Dep. Variable: lnyield R-squared: {model.rsquared:.3f}
Model: OLS Adj. R-squared: {model.rsquared_adj:.3f}
Within R-squared: {within_r2:.3f}
Method: Least Squares F-statistic: {model.fvalue:.1f}
Date: {datetime.now():%a, %d %b %Y} Prob (F-statistic): {model.f_pvalue:.2e}
Time: {datetime.now():%H:%M:%S} Log-Likelihood: {model.llf:.2f}
No. Observations: {model.nobs:.0f} AIC: {model.aic:.1f}
Df Residuals: {model.df_resid:.0f} BIC: {model.bic:.1f}
Df Model: {model.df_model:.0f}
Covariance Type: cluster
==============================================================================
"""
# Combine the OLS info with the filtered summary
final_summary = ols_info + "\n" + filtered_summary.to_string()
# Print the final summary
print(final_summary)
OLS Regression Results ============================================================================== Dep. Variable: lnyield R-squared: 0.802 Model: OLS Adj. R-squared: 0.771 Within R-squared: 0.135 Method: Least Squares F-statistic: -12799057702605.9 Date: Thu, 29 Aug 2024 Prob (F-statistic): 1.00e+00 Time: 14:49:41 Log-Likelihood: 964.68 No. Observations: 1613 AIC: -1495.4 Df Residuals: 1396 BIC: -326.6 Df Model: 216 Covariance Type: cluster ============================================================================== Coef. Std.Err. z P>|z| [0.025 0.975] Intercept 5.252103 0.455395 11.533069 8.988165e-31 4.359545 6.144661 lnfer 0.394333 0.048448 8.139324 3.974923e-16 0.299377 0.489289 tmp 0.198333 0.050717 3.910591 9.207056e-05 0.098930 0.297736 tmp_tmp_interaction -0.006274 0.001805 -3.475439 5.100177e-04 -0.009813 -0.002736 pre 0.060279 0.021031 2.866151 4.154958e-03 0.019058 0.101499 pre_pre_interaction -0.002577 0.001448 -1.779549 7.514987e-02 -0.005415 0.000261
/var/folders/vd/0_phd7hx2n51y4412862zww00000gp/T/ipykernel_17445/1034416423.py:5: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame. Try using .loc[row_indexer,col_indexer] = value instead See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy data['pre_pre_interaction'] = data['pre'] * data['pre'] /var/folders/vd/0_phd7hx2n51y4412862zww00000gp/T/ipykernel_17445/1034416423.py:6: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame. Try using .loc[row_indexer,col_indexer] = value instead See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy data['lnfer_pre_interaction'] = data['lnfer'] * data['pre'] /var/folders/vd/0_phd7hx2n51y4412862zww00000gp/T/ipykernel_17445/1034416423.py:7: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame. Try using .loc[row_indexer,col_indexer] = value instead See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy data['lnfer_pre_pre_interaction'] = data['lnfer'] * data['pre'] * data['pre'] /var/folders/vd/0_phd7hx2n51y4412862zww00000gp/T/ipykernel_17445/1034416423.py:9: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame. Try using .loc[row_indexer,col_indexer] = value instead See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy data['tmp_tmp_interaction'] = data['tmp'] * data['tmp'] /var/folders/vd/0_phd7hx2n51y4412862zww00000gp/T/ipykernel_17445/1034416423.py:10: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame. Try using .loc[row_indexer,col_indexer] = value instead See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy data['lnfer_tmp_interaction'] = data['lnfer'] * data['tmp'] /var/folders/vd/0_phd7hx2n51y4412862zww00000gp/T/ipykernel_17445/1034416423.py:11: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame. Try using .loc[row_indexer,col_indexer] = value instead See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy data['lnfer_tmp_tmp_interaction'] = data['lnfer'] * data['tmp'] * data['tmp'] /Users/chenchenren/anaconda3/lib/python3.11/site-packages/statsmodels/base/model.py:1888: ValueWarning: covariance of constraints does not have full rank. The number of constraints is 216, but rank is 5 warnings.warn('covariance of constraints does not have full '
# Extract the coefficients for tmp and tmp_tmp_interaction
coeff_tmp_rfd1 = model.params['tmp']
coeff_tmp_tmp_interaction_rfd1 = model.params['tmp_tmp_interaction']
# Define the range of tmp based on the input data
tmp_range_rfd1 = np.linspace(np.percentile(data['tmp'],5), np.percentile(data['tmp'],95), 100)
# Calculate predicted lnyield using the quadratic form
lnyield_pred_tmp_rfd1 = coeff_tmp_rfd1 * tmp_range_rfd1 + coeff_tmp_tmp_interaction_rfd1 * (tmp_range_rfd1 ** 2)-1.2
# Plotting the results
plt.figure(figsize=(10, 6))
plt.plot(tmp_range_rfd1, lnyield_pred_tmp_rfd1, label='Predicted lnyield', color='blue')
plt.xlabel('tmp')
plt.ylabel('lnyield')
plt.title('Nonlinear Relationship between tmp and lnyield')
plt.legend()
plt.show()
# Extract the coefficients for pre and pre_pre_interaction
coeff_pre_rfd1 = model.params['pre']
coeff_pre_pre_interaction_rfd1 = model.params['pre_pre_interaction']
# Define the range of pre based on the input data
pre_range_rfd1 = np.linspace(np.percentile(data['pre'],5), np.percentile(data['pre'],95), 100)
# Calculate predicted lnyield using the quadratic form
lnyield_pred_pre_rfd1 = coeff_pre_rfd1 * pre_range_rfd1 + coeff_pre_pre_interaction_rfd1 * (pre_range_rfd1 ** 2)-0.25
# Plotting the results
plt.figure(figsize=(10, 6))
plt.plot(pre_range_rfd1, lnyield_pred_pre_rfd1, label='Predicted lnyield', color='blue')
plt.xlabel('pre')
plt.ylabel('lnyield')
plt.title('Nonlinear Relationship between pre and lnyield')
plt.legend()
plt.show()
# Filter data for rainfed corn
data = df_corn[(df_corn['group'] == 2)&((df_corn['zone'] == 2))]
# Prepare the interaction terms
data['pre_pre_interaction'] = data['pre'] * data['pre']
data['lnfer_pre_interaction'] = data['lnfer'] * data['pre']
data['lnfer_pre_pre_interaction'] = data['lnfer'] * data['pre'] * data['pre']
data['tmp_tmp_interaction'] = data['tmp'] * data['tmp']
data['lnfer_tmp_interaction'] = data['lnfer'] * data['tmp']
data['lnfer_tmp_tmp_interaction'] = data['lnfer'] * data['tmp'] * data['tmp']
# Define the formula for the regression model
formula = 'lnyield ~ lnfer + tmp + tmp_tmp_interaction + pre + pre_pre_interaction + C(geoid)'
# Run the regression using the formula interface with clustered standard errors
model = sm.OLS.from_formula(formula, data=data).fit(cov_type='cluster', cov_kwds={'groups': data['geoid']})
# Extract the summary table
summary = model.summary2().tables[1]
# Filter out rows corresponding to dummy variables for geoid
exclude_terms = ['C(geoid)']
pattern = '|'.join([re.escape(term) for term in exclude_terms])
filtered_summary = summary.loc[~summary.index.str.contains(pattern)]
fixed_effects_only_model = sm.OLS.from_formula('lnyield ~ C(geoid)', data=data).fit()
# Calculate the total sum of squares (TSS) using the model with only fixed effects
tss_within = np.sum((fixed_effects_only_model.resid) ** 2)
# Calculate the residual sum of squares (RSS) from your main model
rss_within = np.sum((model.resid) ** 2)
# Calculate the within R-squared
within_r2 = 1 - (rss_within / tss_within)
# Format the OLS summary with the within R-squared
ols_info = f"""
OLS Regression Results
==============================================================================
Dep. Variable: lnyield R-squared: {model.rsquared:.3f}
Model: OLS Adj. R-squared: {model.rsquared_adj:.3f}
Within R-squared: {within_r2:.3f}
Method: Least Squares F-statistic: {model.fvalue:.1f}
Date: {datetime.now():%a, %d %b %Y} Prob (F-statistic): {model.f_pvalue:.2e}
Time: {datetime.now():%H:%M:%S} Log-Likelihood: {model.llf:.2f}
No. Observations: {model.nobs:.0f} AIC: {model.aic:.1f}
Df Residuals: {model.df_resid:.0f} BIC: {model.bic:.1f}
Df Model: {model.df_model:.0f}
Covariance Type: cluster
==============================================================================
"""
# Combine the OLS info with the filtered summary
final_summary = ols_info + "\n" + filtered_summary.to_string()
# Print the final summary
print(final_summary)
/var/folders/vd/0_phd7hx2n51y4412862zww00000gp/T/ipykernel_17445/3422232410.py:5: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame. Try using .loc[row_indexer,col_indexer] = value instead See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy data['pre_pre_interaction'] = data['pre'] * data['pre'] /var/folders/vd/0_phd7hx2n51y4412862zww00000gp/T/ipykernel_17445/3422232410.py:6: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame. Try using .loc[row_indexer,col_indexer] = value instead See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy data['lnfer_pre_interaction'] = data['lnfer'] * data['pre'] /var/folders/vd/0_phd7hx2n51y4412862zww00000gp/T/ipykernel_17445/3422232410.py:7: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame. Try using .loc[row_indexer,col_indexer] = value instead See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy data['lnfer_pre_pre_interaction'] = data['lnfer'] * data['pre'] * data['pre'] /var/folders/vd/0_phd7hx2n51y4412862zww00000gp/T/ipykernel_17445/3422232410.py:9: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame. Try using .loc[row_indexer,col_indexer] = value instead See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy data['tmp_tmp_interaction'] = data['tmp'] * data['tmp'] /var/folders/vd/0_phd7hx2n51y4412862zww00000gp/T/ipykernel_17445/3422232410.py:10: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame. Try using .loc[row_indexer,col_indexer] = value instead See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy data['lnfer_tmp_interaction'] = data['lnfer'] * data['tmp'] /var/folders/vd/0_phd7hx2n51y4412862zww00000gp/T/ipykernel_17445/3422232410.py:11: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame. Try using .loc[row_indexer,col_indexer] = value instead See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy data['lnfer_tmp_tmp_interaction'] = data['lnfer'] * data['tmp'] * data['tmp'] /Users/chenchenren/anaconda3/lib/python3.11/site-packages/statsmodels/base/model.py:1888: ValueWarning: covariance of constraints does not have full rank. The number of constraints is 343, but rank is 5 warnings.warn('covariance of constraints does not have full '
OLS Regression Results ============================================================================== Dep. Variable: lnyield R-squared: 0.642 Model: OLS Adj. R-squared: 0.591 Within R-squared: 0.356 Method: Least Squares F-statistic: -2330358109381.1 Date: Thu, 29 Aug 2024 Prob (F-statistic): 1.00e+00 Time: 14:49:42 Log-Likelihood: 1468.85 No. Observations: 2765 AIC: -2249.7 Df Residuals: 2421 BIC: -211.6 Df Model: 343 Covariance Type: cluster ============================================================================== Coef. Std.Err. z P>|z| [0.025 0.975] Intercept -1.102642 0.775462 -1.421917 1.550503e-01 -2.622519 0.417235 lnfer 0.927906 0.088061 10.537055 5.829597e-26 0.755309 1.100503 tmp 0.467413 0.061618 7.585637 3.308585e-14 0.346644 0.588183 tmp_tmp_interaction -0.015473 0.001927 -8.027960 9.910679e-16 -0.019251 -0.011696 pre 0.147393 0.021095 6.987085 2.806557e-12 0.106047 0.188738 pre_pre_interaction -0.006740 0.001355 -4.974587 6.538695e-07 -0.009396 -0.004085
# Extract the coefficients for tmp and tmp_tmp_interaction
coeff_tmp_rfd2 = model.params['tmp']
coeff_tmp_tmp_interaction_rfd2 = model.params['tmp_tmp_interaction']
# Define the range of tmp based on the input data
tmp_range_rfd2 = np.linspace(np.percentile(data['tmp'],5), np.percentile(data['tmp'],95), 100)
# Calculate predicted lnyield using the quadratic form
lnyield_pred_tmp_rfd2 = coeff_tmp_rfd2 * tmp_range_rfd2 + coeff_tmp_tmp_interaction_rfd2 * (tmp_range_rfd2 ** 2)-3.2
# Plotting the results
plt.figure(figsize=(10, 6))
plt.plot(tmp_range_rfd2, lnyield_pred_tmp_rfd2, label='Predicted lnyield', color='blue')
plt.xlabel('tmp')
plt.ylabel('lnyield')
plt.title('Nonlinear Relationship between tmp and lnyield')
plt.legend()
plt.show()
# Extract the coefficients for pre and pre_pre_interaction
coeff_pre_rfd2 = model.params['pre']
coeff_pre_pre_interaction_rfd2 = model.params['pre_pre_interaction']
# Define the range of pre based on the input data
pre_range_rfd2 = np.linspace(np.percentile(data['pre'],5), np.percentile(data['pre'],95), 100)
# Calculate predicted lnyield using the quadratic form
lnyield_pred_pre_rfd2 = coeff_pre_rfd2 * pre_range_rfd2 + coeff_pre_pre_interaction_rfd2 * (pre_range_rfd2 ** 2)-0.65
# Plotting the results
plt.figure(figsize=(10, 6))
plt.plot(pre_range_rfd2, lnyield_pred_pre_rfd2, label='Predicted lnyield', color='blue')
plt.xlabel('pre')
plt.ylabel('lnyield')
plt.title('Nonlinear Relationship between pre and lnyield')
plt.legend()
plt.show()
# Filter data for rainfed corn
data = df_corn[(df_corn['group'] == 2)&((df_corn['zone'] == 3))]
# Prepare the interaction terms
data['pre_pre_interaction'] = data['pre'] * data['pre']
data['lnfer_pre_interaction'] = data['lnfer'] * data['pre']
data['lnfer_pre_pre_interaction'] = data['lnfer'] * data['pre'] * data['pre']
data['tmp_tmp_interaction'] = data['tmp'] * data['tmp']
data['lnfer_tmp_interaction'] = data['lnfer'] * data['tmp']
data['lnfer_tmp_tmp_interaction'] = data['lnfer'] * data['tmp'] * data['tmp']
# Define the formula for the regression model
formula = 'lnyield ~ lnfer + tmp + tmp_tmp_interaction + pre + pre_pre_interaction + C(geoid)'
# Run the regression using the formula interface with clustered standard errors
model = sm.OLS.from_formula(formula, data=data).fit(cov_type='cluster', cov_kwds={'groups': data['geoid']})
# Extract the summary table
summary = model.summary2().tables[1]
# Filter out rows corresponding to dummy variables for geoid
exclude_terms = ['C(geoid)']
pattern = '|'.join([re.escape(term) for term in exclude_terms])
filtered_summary = summary.loc[~summary.index.str.contains(pattern)]
fixed_effects_only_model = sm.OLS.from_formula('lnyield ~ C(geoid)', data=data).fit()
# Calculate the total sum of squares (TSS) using the model with only fixed effects
tss_within = np.sum((fixed_effects_only_model.resid) ** 2)
# Calculate the residual sum of squares (RSS) from your main model
rss_within = np.sum((model.resid) ** 2)
# Calculate the within R-squared
within_r2 = 1 - (rss_within / tss_within)
# Format the OLS summary with the within R-squared
ols_info = f"""
OLS Regression Results
==============================================================================
Dep. Variable: lnyield R-squared: {model.rsquared:.3f}
Model: OLS Adj. R-squared: {model.rsquared_adj:.3f}
Within R-squared: {within_r2:.3f}
Method: Least Squares F-statistic: {model.fvalue:.1f}
Date: {datetime.now():%a, %d %b %Y} Prob (F-statistic): {model.f_pvalue:.2e}
Time: {datetime.now():%H:%M:%S} Log-Likelihood: {model.llf:.2f}
No. Observations: {model.nobs:.0f} AIC: {model.aic:.1f}
Df Residuals: {model.df_resid:.0f} BIC: {model.bic:.1f}
Df Model: {model.df_model:.0f}
Covariance Type: cluster
==============================================================================
"""
# Combine the OLS info with the filtered summary
final_summary = ols_info + "\n" + filtered_summary.to_string()
# Print the final summary
print(final_summary)
/var/folders/vd/0_phd7hx2n51y4412862zww00000gp/T/ipykernel_17445/1910022112.py:5: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame. Try using .loc[row_indexer,col_indexer] = value instead See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy data['pre_pre_interaction'] = data['pre'] * data['pre'] /var/folders/vd/0_phd7hx2n51y4412862zww00000gp/T/ipykernel_17445/1910022112.py:6: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame. Try using .loc[row_indexer,col_indexer] = value instead See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy data['lnfer_pre_interaction'] = data['lnfer'] * data['pre'] /var/folders/vd/0_phd7hx2n51y4412862zww00000gp/T/ipykernel_17445/1910022112.py:7: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame. Try using .loc[row_indexer,col_indexer] = value instead See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy data['lnfer_pre_pre_interaction'] = data['lnfer'] * data['pre'] * data['pre'] /var/folders/vd/0_phd7hx2n51y4412862zww00000gp/T/ipykernel_17445/1910022112.py:9: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame. Try using .loc[row_indexer,col_indexer] = value instead See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy data['tmp_tmp_interaction'] = data['tmp'] * data['tmp'] /var/folders/vd/0_phd7hx2n51y4412862zww00000gp/T/ipykernel_17445/1910022112.py:10: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame. Try using .loc[row_indexer,col_indexer] = value instead See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy data['lnfer_tmp_interaction'] = data['lnfer'] * data['tmp'] /var/folders/vd/0_phd7hx2n51y4412862zww00000gp/T/ipykernel_17445/1910022112.py:11: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame. Try using .loc[row_indexer,col_indexer] = value instead See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy data['lnfer_tmp_tmp_interaction'] = data['lnfer'] * data['tmp'] * data['tmp'] /Users/chenchenren/anaconda3/lib/python3.11/site-packages/statsmodels/base/model.py:1888: ValueWarning: covariance of constraints does not have full rank. The number of constraints is 326, but rank is 5 warnings.warn('covariance of constraints does not have full '
OLS Regression Results ============================================================================== Dep. Variable: lnyield R-squared: 0.609 Model: OLS Adj. R-squared: 0.534 Within R-squared: 0.463 Method: Least Squares F-statistic: 83.5 Date: Thu, 29 Aug 2024 Prob (F-statistic): 5.64e-56 Time: 14:49:42 Log-Likelihood: 341.93 No. Observations: 2029 AIC: -29.9 Df Residuals: 1702 BIC: 1806.3 Df Model: 326 Covariance Type: cluster ============================================================================== Coef. Std.Err. z P>|z| [0.025 0.975] Intercept 6.990404 0.977577 7.150747 8.630714e-13 5.074389 8.906420 lnfer 1.064302 0.135298 7.866380 3.650518e-15 0.799124 1.329480 tmp -0.354535 0.113748 -3.116850 1.827946e-03 -0.577476 -0.131593 tmp_tmp_interaction 0.008139 0.003117 2.611271 9.020632e-03 0.002030 0.014249 pre 0.551791 0.072591 7.601347 2.930642e-14 0.409515 0.694067 pre_pre_interaction -0.027291 0.004045 -6.747497 1.504172e-11 -0.035218 -0.019363
# Extract the coefficients for tmp and tmp_tmp_interaction
coeff_tmp_rfd3 = model.params['tmp']
coeff_tmp_tmp_interaction_rfd3 = model.params['tmp_tmp_interaction']
# Define the range of tmp based on the input data
tmp_range_rfd3 = np.linspace(np.percentile(data['tmp'],5), np.percentile(data['tmp'],95), 100)
# Calculate predicted lnyield using the quadratic form
lnyield_pred_tmp_rfd3 = coeff_tmp_rfd3 * tmp_range_rfd3 + coeff_tmp_tmp_interaction_rfd3 * (tmp_range_rfd3 ** 2)+4
# Plotting the results
plt.figure(figsize=(10, 6))
plt.plot(tmp_range_rfd3, lnyield_pred_tmp_rfd3, label='Predicted lnyield', color='blue')
plt.xlabel('tmp')
plt.ylabel('lnyield')
plt.title('Nonlinear Relationship between tmp and lnyield')
plt.legend()
plt.show()
# Extract the coefficients for pre and pre_pre_interaction
coeff_pre_rfd3 = model.params['pre']
coeff_pre_pre_interaction_rfd3 = model.params['pre_pre_interaction']
# Define the range of pre based on the input data
pre_range_rfd3 = np.linspace(np.percentile(data['pre'],5), np.percentile(data['pre'],95), 100)
# Calculate predicted lnyield using the quadratic form
lnyield_pred_pre_rfd3 = coeff_pre_rfd3 * pre_range_rfd3 + coeff_pre_pre_interaction_rfd3 * (pre_range_rfd3 ** 2)-2.6
# Plotting the results
plt.figure(figsize=(10, 6))
plt.plot(pre_range_rfd3, lnyield_pred_pre_rfd3, label='Predicted lnyield', color='blue')
plt.xlabel('pre')
plt.ylabel('lnyield')
plt.title('Nonlinear Relationship between pre and lnyield')
plt.legend()
plt.show()
# Filter data for rainfed corn
data = df_corn[(df_corn['group'] == 2)&((df_corn['zone'] == 4))]
# Prepare the interaction terms
data['pre_pre_interaction'] = data['pre'] * data['pre']
data['lnfer_pre_interaction'] = data['lnfer'] * data['pre']
data['lnfer_pre_pre_interaction'] = data['lnfer'] * data['pre'] * data['pre']
data['tmp_tmp_interaction'] = data['tmp'] * data['tmp']
data['lnfer_tmp_interaction'] = data['lnfer'] * data['tmp']
data['lnfer_tmp_tmp_interaction'] = data['lnfer'] * data['tmp'] * data['tmp']
# Define the formula for the regression model
formula = 'lnyield ~ lnfer + tmp + tmp_tmp_interaction + pre + pre_pre_interaction + C(geoid)'
# Run the regression using the formula interface with clustered standard errors
model = sm.OLS.from_formula(formula, data=data).fit(cov_type='cluster', cov_kwds={'groups': data['geoid']})
# Extract the summary table
summary = model.summary2().tables[1]
# Filter out rows corresponding to dummy variables for geoid
exclude_terms = ['C(geoid)']
pattern = '|'.join([re.escape(term) for term in exclude_terms])
filtered_summary = summary.loc[~summary.index.str.contains(pattern)]
fixed_effects_only_model = sm.OLS.from_formula('lnyield ~ C(geoid)', data=data).fit()
# Calculate the total sum of squares (TSS) using the model with only fixed effects
tss_within = np.sum((fixed_effects_only_model.resid) ** 2)
# Calculate the residual sum of squares (RSS) from your main model
rss_within = np.sum((model.resid) ** 2)
# Calculate the within R-squared
within_r2 = 1 - (rss_within / tss_within)
# Format the OLS summary with the within R-squared
ols_info = f"""
OLS Regression Results
==============================================================================
Dep. Variable: lnyield R-squared: {model.rsquared:.3f}
Model: OLS Adj. R-squared: {model.rsquared_adj:.3f}
Within R-squared: {within_r2:.3f}
Method: Least Squares F-statistic: {model.fvalue:.1f}
Date: {datetime.now():%a, %d %b %Y} Prob (F-statistic): {model.f_pvalue:.2e}
Time: {datetime.now():%H:%M:%S} Log-Likelihood: {model.llf:.2f}
No. Observations: {model.nobs:.0f} AIC: {model.aic:.1f}
Df Residuals: {model.df_resid:.0f} BIC: {model.bic:.1f}
Df Model: {model.df_model:.0f}
Covariance Type: cluster
==============================================================================
"""
# Combine the OLS info with the filtered summary
final_summary = ols_info + "\n" + filtered_summary.to_string()
# Print the final summary
print(final_summary)
OLS Regression Results ============================================================================== Dep. Variable: lnyield R-squared: 0.643 Model: OLS Adj. R-squared: 0.549 Within R-squared: 0.124 Method: Least Squares F-statistic: 179386718552.6 Date: Thu, 29 Aug 2024 Prob (F-statistic): 0.00e+00 Time: 14:50:54 Log-Likelihood: 0.71 No. Observations: 1329 AIC: 556.6 Df Residuals: 1050 BIC: 2005.2 Df Model: 278 Covariance Type: cluster ============================================================================== Coef. Std.Err. z P>|z| [0.025 0.975] Intercept 12.242545 2.358220 5.191435 2.086796e-07 7.620519 16.864571 lnfer 0.371699 0.119700 3.105247 1.901203e-03 0.137091 0.606307 tmp -0.575415 0.215495 -2.670204 7.580525e-03 -0.997777 -0.153053 tmp_tmp_interaction 0.012350 0.005323 2.319926 2.034488e-02 0.001916 0.022784 pre 0.150640 0.036275 4.152740 3.285175e-05 0.079542 0.221737 pre_pre_interaction -0.006717 0.002009 -3.342598 8.299800e-04 -0.010655 -0.002778
/var/folders/vd/0_phd7hx2n51y4412862zww00000gp/T/ipykernel_17445/2475739950.py:5: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame. Try using .loc[row_indexer,col_indexer] = value instead See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy data['pre_pre_interaction'] = data['pre'] * data['pre'] /var/folders/vd/0_phd7hx2n51y4412862zww00000gp/T/ipykernel_17445/2475739950.py:6: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame. Try using .loc[row_indexer,col_indexer] = value instead See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy data['lnfer_pre_interaction'] = data['lnfer'] * data['pre'] /var/folders/vd/0_phd7hx2n51y4412862zww00000gp/T/ipykernel_17445/2475739950.py:7: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame. Try using .loc[row_indexer,col_indexer] = value instead See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy data['lnfer_pre_pre_interaction'] = data['lnfer'] * data['pre'] * data['pre'] /var/folders/vd/0_phd7hx2n51y4412862zww00000gp/T/ipykernel_17445/2475739950.py:9: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame. Try using .loc[row_indexer,col_indexer] = value instead See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy data['tmp_tmp_interaction'] = data['tmp'] * data['tmp'] /var/folders/vd/0_phd7hx2n51y4412862zww00000gp/T/ipykernel_17445/2475739950.py:10: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame. Try using .loc[row_indexer,col_indexer] = value instead See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy data['lnfer_tmp_interaction'] = data['lnfer'] * data['tmp'] /var/folders/vd/0_phd7hx2n51y4412862zww00000gp/T/ipykernel_17445/2475739950.py:11: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame. Try using .loc[row_indexer,col_indexer] = value instead See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy data['lnfer_tmp_tmp_interaction'] = data['lnfer'] * data['tmp'] * data['tmp'] /Users/chenchenren/anaconda3/lib/python3.11/site-packages/statsmodels/base/model.py:1888: ValueWarning: covariance of constraints does not have full rank. The number of constraints is 278, but rank is 5 warnings.warn('covariance of constraints does not have full '
# Extract the coefficients for tmp and tmp_tmp_interaction
coeff_tmp_rfd4 = model.params['tmp']
coeff_tmp_tmp_interaction_rfd4 = model.params['tmp_tmp_interaction']
# Define the range of tmp based on the input data
tmp_range_rfd4 = np.linspace(np.percentile(data['tmp'],5), np.percentile(data['tmp'],95), 100)
# Calculate predicted lnyield using the quadratic form
lnyield_pred_tmp_rfd4 = coeff_tmp_rfd4 * tmp_range_rfd4 + coeff_tmp_tmp_interaction_rfd4 * (tmp_range_rfd4 ** 2)+6.65
# Plotting the results
plt.figure(figsize=(10, 6))
plt.plot(tmp_range_rfd4, lnyield_pred_tmp_rfd4, label='Predicted lnyield', color='blue')
plt.xlabel('tmp')
plt.ylabel('lnyield')
plt.title('Nonlinear Relationship between tmp and lnyield')
plt.legend()
plt.show()
# Extract the coefficients for pre and pre_pre_interaction
coeff_pre_rfd4 = model.params['pre']
coeff_pre_pre_interaction_rfd4 = model.params['pre_pre_interaction']
# Define the range of pre based on the input data
pre_range_rfd4 = np.linspace(np.percentile(data['pre'],5), np.percentile(data['pre'],95), 100)
# Calculate predicted lnyield using the quadratic form
lnyield_pred_pre_rfd4 = coeff_pre_rfd4 * pre_range_rfd4 + coeff_pre_pre_interaction_rfd4 * (pre_range_rfd4 ** 2)-0.72
# Plotting the results
plt.figure(figsize=(10, 6))
plt.plot(pre_range_rfd4, lnyield_pred_pre_rfd4, label='Predicted lnyield', color='blue')
plt.xlabel('pre')
plt.ylabel('lnyield')
plt.title('Nonlinear Relationship between pre and lnyield')
plt.legend()
plt.show()
# Filter data for rainfed corn
data = df_corn[(df_corn['group'] == 2)&((df_corn['zone'] == 5))]
# Prepare the interaction terms
data['pre_pre_interaction'] = data['pre'] * data['pre']
data['lnfer_pre_interaction'] = data['lnfer'] * data['pre']
data['lnfer_pre_pre_interaction'] = data['lnfer'] * data['pre'] * data['pre']
data['tmp_tmp_interaction'] = data['tmp'] * data['tmp']
data['lnfer_tmp_interaction'] = data['lnfer'] * data['tmp']
data['lnfer_tmp_tmp_interaction'] = data['lnfer'] * data['tmp'] * data['tmp']
# Define the formula for the regression model
formula = 'lnyield ~ lnfer + tmp + tmp_tmp_interaction + pre + pre_pre_interaction + C(geoid)'
# Run the regression using the formula interface with clustered standard errors
model = sm.OLS.from_formula(formula, data=data).fit(cov_type='cluster', cov_kwds={'groups': data['geoid']})
# Extract the summary table
summary = model.summary2().tables[1]
# Filter out rows corresponding to dummy variables for geoid
exclude_terms = ['C(geoid)']
pattern = '|'.join([re.escape(term) for term in exclude_terms])
filtered_summary = summary.loc[~summary.index.str.contains(pattern)]
fixed_effects_only_model = sm.OLS.from_formula('lnyield ~ C(geoid)', data=data).fit()
# Calculate the total sum of squares (TSS) using the model with only fixed effects
tss_within = np.sum((fixed_effects_only_model.resid) ** 2)
# Calculate the residual sum of squares (RSS) from your main model
rss_within = np.sum((model.resid) ** 2)
# Calculate the within R-squared
within_r2 = 1 - (rss_within / tss_within)
# Format the OLS summary with the within R-squared
ols_info = f"""
OLS Regression Results
==============================================================================
Dep. Variable: lnyield R-squared: {model.rsquared:.3f}
Model: OLS Adj. R-squared: {model.rsquared_adj:.3f}
Within R-squared: {within_r2:.3f}
Method: Least Squares F-statistic: {model.fvalue:.1f}
Date: {datetime.now():%a, %d %b %Y} Prob (F-statistic): {model.f_pvalue:.2e}
Time: {datetime.now():%H:%M:%S} Log-Likelihood: {model.llf:.2f}
No. Observations: {model.nobs:.0f} AIC: {model.aic:.1f}
Df Residuals: {model.df_resid:.0f} BIC: {model.bic:.1f}
Df Model: {model.df_model:.0f}
Covariance Type: cluster
==============================================================================
"""
# Combine the OLS info with the filtered summary
final_summary = ols_info + "\n" + filtered_summary.to_string()
# Print the final summary
print(final_summary)
OLS Regression Results ============================================================================== Dep. Variable: lnyield R-squared: 0.715 Model: OLS Adj. R-squared: 0.656 Within R-squared: 0.129 Method: Least Squares F-statistic: -2271384724277.0 Date: Thu, 29 Aug 2024 Prob (F-statistic): 1.00e+00 Time: 14:50:54 Log-Likelihood: -28.81 No. Observations: 1164 AIC: 463.6 Df Residuals: 961 BIC: 1490.7 Df Model: 202 Covariance Type: cluster ============================================================================== Coef. Std.Err. z P>|z| [0.025 0.975] Intercept -1.589625 4.649129 -0.341919 0.732412 -10.701750 7.522500 lnfer -0.043215 0.135072 -0.319941 0.749013 -0.307951 0.221521 tmp 0.878089 0.396345 2.215468 0.026728 0.101268 1.654910 tmp_tmp_interaction -0.019393 0.008664 -2.238393 0.025195 -0.036374 -0.002412 pre 0.219743 0.045706 4.807772 0.000002 0.130161 0.309325 pre_pre_interaction -0.011562 0.002710 -4.265606 0.000020 -0.016874 -0.006249
/var/folders/vd/0_phd7hx2n51y4412862zww00000gp/T/ipykernel_17445/3732171178.py:5: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame. Try using .loc[row_indexer,col_indexer] = value instead See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy data['pre_pre_interaction'] = data['pre'] * data['pre'] /var/folders/vd/0_phd7hx2n51y4412862zww00000gp/T/ipykernel_17445/3732171178.py:6: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame. Try using .loc[row_indexer,col_indexer] = value instead See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy data['lnfer_pre_interaction'] = data['lnfer'] * data['pre'] /var/folders/vd/0_phd7hx2n51y4412862zww00000gp/T/ipykernel_17445/3732171178.py:7: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame. Try using .loc[row_indexer,col_indexer] = value instead See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy data['lnfer_pre_pre_interaction'] = data['lnfer'] * data['pre'] * data['pre'] /var/folders/vd/0_phd7hx2n51y4412862zww00000gp/T/ipykernel_17445/3732171178.py:9: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame. Try using .loc[row_indexer,col_indexer] = value instead See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy data['tmp_tmp_interaction'] = data['tmp'] * data['tmp'] /var/folders/vd/0_phd7hx2n51y4412862zww00000gp/T/ipykernel_17445/3732171178.py:10: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame. Try using .loc[row_indexer,col_indexer] = value instead See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy data['lnfer_tmp_interaction'] = data['lnfer'] * data['tmp'] /var/folders/vd/0_phd7hx2n51y4412862zww00000gp/T/ipykernel_17445/3732171178.py:11: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame. Try using .loc[row_indexer,col_indexer] = value instead See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy data['lnfer_tmp_tmp_interaction'] = data['lnfer'] * data['tmp'] * data['tmp'] /Users/chenchenren/anaconda3/lib/python3.11/site-packages/statsmodels/base/model.py:1888: ValueWarning: covariance of constraints does not have full rank. The number of constraints is 202, but rank is 5 warnings.warn('covariance of constraints does not have full '
# Extract the coefficients for tmp and tmp_tmp_interaction
coeff_tmp_rfd5 = model.params['tmp']
coeff_tmp_tmp_interaction_rfd5 = model.params['tmp_tmp_interaction']
# Define the range of tmp based on the input data
tmp_range_rfd5 = np.linspace(np.percentile(data['tmp'],5), np.percentile(data['tmp'],95), 100)
# Calculate predicted lnyield using the quadratic form
lnyield_pred_tmp_rfd5 = coeff_tmp_rfd5 * tmp_range_rfd5 + coeff_tmp_tmp_interaction_rfd5 * (tmp_range_rfd5 ** 2)-9.85
# Plotting the results
plt.figure(figsize=(10, 6))
plt.plot(tmp_range_rfd5, lnyield_pred_tmp_rfd5, label='Predicted lnyield', color='blue')
plt.xlabel('tmp')
plt.ylabel('lnyield')
plt.title('Nonlinear Relationship between tmp and lnyield')
plt.legend()
plt.show()
# Extract the coefficients for pre and pre_pre_interaction
coeff_pre_rfd5 = model.params['pre']
coeff_pre_pre_interaction_rfd5 = model.params['pre_pre_interaction']
# Define the range of pre based on the input data
pre_range_rfd5 = np.linspace(np.percentile(data['pre'],5), np.percentile(data['pre'],95), 100)
# Calculate predicted lnyield using the quadratic form
lnyield_pred_pre_rfd5 = coeff_pre_rfd5 * pre_range_rfd5 + coeff_pre_pre_interaction_rfd5 * (pre_range_rfd5 ** 2)-0.91
# Plotting the results
plt.figure(figsize=(10, 6))
plt.plot(pre_range_rfd5, lnyield_pred_pre_rfd5, label='Predicted lnyield', color='blue')
plt.xlabel('pre')
plt.ylabel('lnyield')
plt.title('Nonlinear Relationship between pre and lnyield')
plt.legend()
plt.show()
# Set font to Arial and size to 12
rcParams['font.family'] = 'Arial'
rcParams['font.size'] = 16
# Define a list of (tmp_range, lnyield_pred) tuples for the five models
models = [(tmp_range_rfd1, lnyield_pred_tmp_rfd1),
(tmp_range_rfd2, lnyield_pred_tmp_rfd2),
(tmp_range_rfd3, lnyield_pred_tmp_rfd3),
(tmp_range_rfd4, lnyield_pred_tmp_rfd4),
(tmp_range_rfd5, lnyield_pred_tmp_rfd5)]
# Plotting the results
plt.figure(figsize=(4, 4))
# Plot the first model (One model)
plt.plot(tmp_range_rfd, lnyield_pred_tmp_rfd, label='One model', color='blue', linewidth=4)
plt.fill_between(tmp_range_rfd, ci_lower_tmp_rfd, ci_upper_tmp_rfd, color='blue', alpha=0.2, label='95% CI')
# Plot the five models
for idx, (tmp_range_, lnyield_pred_) in enumerate(models):
if idx == 0:
plt.plot(tmp_range_, lnyield_pred_, color='green', label='Five models')
else:
plt.plot(tmp_range_, lnyield_pred_, color='green')
# Add labels and title
plt.xlabel('Temperature (°C)',fontsize=18, fontfamily='Arial')
plt.ylabel('Ln Yield (kg/ha/yr)', fontsize=18, fontfamily='Arial')
x_ticks = np.linspace(tmp_range_rfd.min(), tmp_range_rfd.max(), num=5) # Custom X tick positions
y_ticks = np.linspace(-1, 1, num=5) # Custom Y tick positions
plt.xticks(ticks=x_ticks, labels=[f'{x:.1f}' for x in x_ticks]) # Custom X tick labels
plt.yticks(ticks=y_ticks, labels=[f'{y:.1f}' for y in y_ticks]) # Custom Y tick labels
plt.tick_params(axis='both', which='major', labelsize=16) # Adjust tick label size
# Save the entire figure as a JPG with DPI 300
plt.savefig('maize_rfd_tem.jpg', format='jpg', dpi=300, bbox_inches='tight', pad_inches=0.1)
# Show the plot
plt.show()
# Set font to Arial and size to 12
rcParams['font.family'] = 'Arial'
rcParams['font.size'] = 16
# Define a list of (pre_range, lnyield_pred) tuples for the five models
models = [(pre_range_rfd1, lnyield_pred_pre_rfd1),
(pre_range_rfd2, lnyield_pred_pre_rfd2),
(pre_range_rfd3, lnyield_pred_pre_rfd3),
(pre_range_rfd4, lnyield_pred_pre_rfd4),
(pre_range_rfd5, lnyield_pred_pre_rfd5)]
# Plotting the results
plt.figure(figsize=(4, 4))
# Plot the first model (One model)
plt.plot(pre_range_rfd, lnyield_pred_pre_rfd, label='One model', color='blue', linewidth=4)
plt.fill_between(pre_range_rfd, ci_lower_pre_rfd, ci_upper_pre_rfd, color='blue', alpha=0.2, label='95% CI')
# Plot the five models
for idx, (pre_range_, lnyield_pred_) in enumerate(models):
if idx == 0:
plt.plot(pre_range_, lnyield_pred_, color='green', label='Five models')
else:
plt.plot(pre_range_, lnyield_pred_, color='green')
# Add labels and title
plt.xlabel('Precipitation (100mm)',fontsize=18, fontfamily='Arial')
plt.ylabel('Ln Yield (kg/ha/yr)', fontsize=18, fontfamily='Arial')
x_ticks = np.linspace(2, 12, num=5) # Custom X tick positions
y_ticks = np.linspace(-0.6, 0.4, num=5) # Custom Y tick positions
plt.xticks(ticks=x_ticks, labels=[f'{x:.1f}' for x in x_ticks]) # Custom X tick labels
plt.yticks(ticks=y_ticks, labels=[f'{y:.1f}' for y in y_ticks]) # Custom Y tick labels
plt.tick_params(axis='both', which='major', labelsize=16) # Adjust tick label size
# Save the entire figure as a JPG with DPI 300
plt.savefig('maize_rfd_pre.jpg', format='jpg', dpi=300, bbox_inches='tight', pad_inches=0.1)
# Show the plot
plt.show()
# Filter data for irrigated corn
data = df_corn[(df_corn['group'] == 1)&(df_corn['zone'] == 1)]
# Prepare the interaction terms
data['pre_pre_interaction'] = data['pre'] * data['pre']
data['irrigation12'] = data['irrigation1'] * data['irrigation1']
data['lnfer_irrigation1_pre_interaction'] = data['lnfer'] * data['irrigation1'] * data['pre']
data['lnfer_irrigation12_pre_pre_interaction'] = data['lnfer'] * data['irrigation1'] * data['irrigation1'] * data['pre'] * data['pre']
data['tmp_tmp_interaction'] = data['tmp'] * data['tmp']
data['lnfer_irrigation1_tmp_interaction'] = data['lnfer'] * data['irrigation1'] * data['tmp']
data['lnfer_irrigation12_tmp_tmp_interaction'] = data['lnfer'] * data['irrigation1'] * data['irrigation1'] * data['tmp'] * data['tmp']
# Define the formula for the regression model
formula = 'lnyield ~ lnfer + irrigation1 + irrigation12 + tmp + tmp_tmp_interaction + pre + pre_pre_interaction+ lnfer_irrigation1_tmp_interaction+lnfer_irrigation12_tmp_tmp_interaction+lnfer_irrigation1_pre_interaction +lnfer_irrigation12_pre_pre_interaction + C(geoid)'
# Run the regression using the formula interface with clustered standard errors
model = sm.OLS.from_formula(formula, data=data).fit(cov_type='cluster', cov_kwds={'groups': data['geoid']})
# Extract the summary table
summary = model.summary2().tables[1]
# Filter out rows corresponding to dummy variables for geoid
exclude_terms = ['C(geoid)']
pattern = '|'.join([re.escape(term) for term in exclude_terms])
filtered_summary = summary.loc[~summary.index.str.contains(pattern)]
fixed_effects_only_model = sm.OLS.from_formula('lnyield ~ C(geoid)', data=data).fit()
# Calculate the total sum of squares (TSS) using the model with only fixed effects
tss_within = np.sum((fixed_effects_only_model.resid) ** 2)
# Calculate the residual sum of squares (RSS) from your main model
rss_within = np.sum((model.resid) ** 2)
# Calculate the within R-squared
within_r2 = 1 - (rss_within / tss_within)
# Format the OLS summary with the within R-squared
ols_info = f"""
OLS Regression Results
==============================================================================
Dep. Variable: lnyield R-squared: {model.rsquared:.3f}
Model: OLS Adj. R-squared: {model.rsquared_adj:.3f}
Within R-squared: {within_r2:.3f}
Method: Least Squares F-statistic: {model.fvalue:.1f}
Date: {datetime.now():%a, %d %b %Y} Prob (F-statistic): {model.f_pvalue:.2e}
Time: {datetime.now():%H:%M:%S} Log-Likelihood: {model.llf:.2f}
No. Observations: {model.nobs:.0f} AIC: {model.aic:.1f}
Df Residuals: {model.df_resid:.0f} BIC: {model.bic:.1f}
Df Model: {model.df_model:.0f}
Covariance Type: cluster
==============================================================================
"""
# Combine the OLS info with the filtered summary
final_summary = ols_info + "\n" + filtered_summary.to_string()
# Print the final summary
print(final_summary)
OLS Regression Results ============================================================================== Dep. Variable: lnyield R-squared: 0.769 Model: OLS Adj. R-squared: 0.738 Within R-squared: 0.203 Method: Least Squares F-statistic: -316470679904.5 Date: Wed, 28 Aug 2024 Prob (F-statistic): 1.00e+00 Time: 17:02:15 Log-Likelihood: 989.80 No. Observations: 1452 AIC: -1631.6 Df Residuals: 1278 BIC: -712.8 Df Model: 173 Covariance Type: cluster ============================================================================== Coef. Std.Err. z P>|z| [0.025 0.975] Intercept 2.649869 0.938825 2.822537 4.764534e-03 0.809805 4.489932 lnfer 0.289802 0.051848 5.589490 2.277374e-08 0.188182 0.391422 irrigation1 0.417184 0.221199 1.886013 5.929321e-02 -0.016358 0.850726 irrigation12 -0.025987 0.011875 -2.188390 2.864119e-02 -0.049261 -0.002713 tmp 0.567397 0.112572 5.040287 4.648339e-07 0.346759 0.788035 tmp_tmp_interaction -0.019233 0.003970 -4.844130 1.271675e-06 -0.027014 -0.011451 pre 0.237504 0.043151 5.503992 3.712862e-08 0.152929 0.322079 pre_pre_interaction -0.014896 0.002958 -5.036466 4.742043e-07 -0.020693 -0.009099 lnfer_irrigation1_tmp_interaction -0.003786 0.002849 -1.329037 1.838357e-01 -0.009370 0.001797 lnfer_irrigation12_tmp_tmp_interaction 0.000027 0.000011 2.586742 9.688822e-03 0.000007 0.000048 lnfer_irrigation1_pre_interaction -0.001136 0.002160 -0.525699 5.990974e-01 -0.005370 0.003099 lnfer_irrigation12_pre_pre_interaction -0.000027 0.000018 -1.453211 1.461653e-01 -0.000063 0.000009
/var/folders/vd/0_phd7hx2n51y4412862zww00000gp/T/ipykernel_3056/728768620.py:5: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame. Try using .loc[row_indexer,col_indexer] = value instead See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy data['pre_pre_interaction'] = data['pre'] * data['pre'] /var/folders/vd/0_phd7hx2n51y4412862zww00000gp/T/ipykernel_3056/728768620.py:6: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame. Try using .loc[row_indexer,col_indexer] = value instead See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy data['irrigation12'] = data['irrigation1'] * data['irrigation1'] /var/folders/vd/0_phd7hx2n51y4412862zww00000gp/T/ipykernel_3056/728768620.py:7: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame. Try using .loc[row_indexer,col_indexer] = value instead See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy data['lnfer_irrigation1_pre_interaction'] = data['lnfer'] * data['irrigation1'] * data['pre'] /var/folders/vd/0_phd7hx2n51y4412862zww00000gp/T/ipykernel_3056/728768620.py:8: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame. Try using .loc[row_indexer,col_indexer] = value instead See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy data['lnfer_irrigation12_pre_pre_interaction'] = data['lnfer'] * data['irrigation1'] * data['irrigation1'] * data['pre'] * data['pre'] /var/folders/vd/0_phd7hx2n51y4412862zww00000gp/T/ipykernel_3056/728768620.py:10: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame. Try using .loc[row_indexer,col_indexer] = value instead See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy data['tmp_tmp_interaction'] = data['tmp'] * data['tmp'] /var/folders/vd/0_phd7hx2n51y4412862zww00000gp/T/ipykernel_3056/728768620.py:11: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame. Try using .loc[row_indexer,col_indexer] = value instead See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy data['lnfer_irrigation1_tmp_interaction'] = data['lnfer'] * data['irrigation1'] * data['tmp'] /var/folders/vd/0_phd7hx2n51y4412862zww00000gp/T/ipykernel_3056/728768620.py:12: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame. Try using .loc[row_indexer,col_indexer] = value instead See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy data['lnfer_irrigation12_tmp_tmp_interaction'] = data['lnfer'] * data['irrigation1'] * data['irrigation1'] * data['tmp'] * data['tmp'] /Users/chenchenren/anaconda3/lib/python3.11/site-packages/statsmodels/base/model.py:1888: ValueWarning: covariance of constraints does not have full rank. The number of constraints is 173, but rank is 11 warnings.warn('covariance of constraints does not have full '
# Filter data for irrigated corn
data = df_corn[(df_corn['group'] == 1)&(df_corn['zone'] == 2)]
# Prepare the interaction terms
data['pre_pre_interaction'] = data['pre'] * data['pre']
data['irrigation12'] = data['irrigation1'] * data['irrigation1']
data['lnfer_irrigation1_pre_interaction'] = data['lnfer'] * data['irrigation1'] * data['pre']
data['lnfer_irrigation12_pre_pre_interaction'] = data['lnfer'] * data['irrigation1'] * data['irrigation1'] * data['pre'] * data['pre']
data['tmp_tmp_interaction'] = data['tmp'] * data['tmp']
data['lnfer_irrigation1_tmp_interaction'] = data['lnfer'] * data['irrigation1'] * data['tmp']
data['lnfer_irrigation12_tmp_tmp_interaction'] = data['lnfer'] * data['irrigation1'] * data['irrigation1'] * data['tmp'] * data['tmp']
# Define the formula for the regression model
formula = 'lnyield ~ lnfer + irrigation1 + irrigation12 + tmp + tmp_tmp_interaction + pre + pre_pre_interaction+ lnfer_irrigation1_tmp_interaction+lnfer_irrigation12_tmp_tmp_interaction+lnfer_irrigation1_pre_interaction +lnfer_irrigation12_pre_pre_interaction + C(geoid)'
# Run the regression using the formula interface with clustered standard errors
model = sm.OLS.from_formula(formula, data=data).fit(cov_type='cluster', cov_kwds={'groups': data['geoid']})
# Extract the summary table
summary = model.summary2().tables[1]
# Filter out rows corresponding to dummy variables for geoid
exclude_terms = ['C(geoid)']
pattern = '|'.join([re.escape(term) for term in exclude_terms])
filtered_summary = summary.loc[~summary.index.str.contains(pattern)]
fixed_effects_only_model = sm.OLS.from_formula('lnyield ~ C(geoid)', data=data).fit()
# Calculate the total sum of squares (TSS) using the model with only fixed effects
tss_within = np.sum((fixed_effects_only_model.resid) ** 2)
# Calculate the residual sum of squares (RSS) from your main model
rss_within = np.sum((model.resid) ** 2)
# Calculate the within R-squared
within_r2 = 1 - (rss_within / tss_within)
# Format the OLS summary with the within R-squared
ols_info = f"""
OLS Regression Results
==============================================================================
Dep. Variable: lnyield R-squared: {model.rsquared:.3f}
Model: OLS Adj. R-squared: {model.rsquared_adj:.3f}
Within R-squared: {within_r2:.3f}
Method: Least Squares F-statistic: {model.fvalue:.1f}
Date: {datetime.now():%a, %d %b %Y} Prob (F-statistic): {model.f_pvalue:.2e}
Time: {datetime.now():%H:%M:%S} Log-Likelihood: {model.llf:.2f}
No. Observations: {model.nobs:.0f} AIC: {model.aic:.1f}
Df Residuals: {model.df_resid:.0f} BIC: {model.bic:.1f}
Df Model: {model.df_model:.0f}
Covariance Type: cluster
==============================================================================
"""
# Combine the OLS info with the filtered summary
final_summary = ols_info + "\n" + filtered_summary.to_string()
# Print the final summary
print(final_summary)
/var/folders/vd/0_phd7hx2n51y4412862zww00000gp/T/ipykernel_3056/1463388600.py:5: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame. Try using .loc[row_indexer,col_indexer] = value instead See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy data['pre_pre_interaction'] = data['pre'] * data['pre'] /var/folders/vd/0_phd7hx2n51y4412862zww00000gp/T/ipykernel_3056/1463388600.py:6: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame. Try using .loc[row_indexer,col_indexer] = value instead See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy data['irrigation12'] = data['irrigation1'] * data['irrigation1'] /var/folders/vd/0_phd7hx2n51y4412862zww00000gp/T/ipykernel_3056/1463388600.py:7: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame. Try using .loc[row_indexer,col_indexer] = value instead See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy data['lnfer_irrigation1_pre_interaction'] = data['lnfer'] * data['irrigation1'] * data['pre'] /var/folders/vd/0_phd7hx2n51y4412862zww00000gp/T/ipykernel_3056/1463388600.py:8: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame. Try using .loc[row_indexer,col_indexer] = value instead See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy data['lnfer_irrigation12_pre_pre_interaction'] = data['lnfer'] * data['irrigation1'] * data['irrigation1'] * data['pre'] * data['pre'] /var/folders/vd/0_phd7hx2n51y4412862zww00000gp/T/ipykernel_3056/1463388600.py:10: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame. Try using .loc[row_indexer,col_indexer] = value instead See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy data['tmp_tmp_interaction'] = data['tmp'] * data['tmp'] /var/folders/vd/0_phd7hx2n51y4412862zww00000gp/T/ipykernel_3056/1463388600.py:11: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame. Try using .loc[row_indexer,col_indexer] = value instead See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy data['lnfer_irrigation1_tmp_interaction'] = data['lnfer'] * data['irrigation1'] * data['tmp'] /var/folders/vd/0_phd7hx2n51y4412862zww00000gp/T/ipykernel_3056/1463388600.py:12: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame. Try using .loc[row_indexer,col_indexer] = value instead See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy data['lnfer_irrigation12_tmp_tmp_interaction'] = data['lnfer'] * data['irrigation1'] * data['irrigation1'] * data['tmp'] * data['tmp']
OLS Regression Results ============================================================================== Dep. Variable: lnyield R-squared: 0.782 Model: OLS Adj. R-squared: 0.745 Within R-squared: 0.327 Method: Least Squares F-statistic: 120.6 Date: Wed, 28 Aug 2024 Prob (F-statistic): 1.13e-115 Time: 17:02:15 Log-Likelihood: 1754.74 No. Observations: 2657 AIC: -2737.5 Df Residuals: 2271 BIC: -465.9 Df Model: 385 Covariance Type: cluster ============================================================================== Coef. Std.Err. z P>|z| [0.025 0.975] Intercept 3.504853 0.581110 6.031303 1.626432e-09 2.365897 4.643808 lnfer 0.338743 0.059702 5.673910 1.395748e-08 0.221730 0.455757 irrigation1 0.344109 0.091500 3.760741 1.694111e-04 0.164772 0.523446 irrigation12 -0.015341 0.004296 -3.570881 3.557820e-04 -0.023761 -0.006921 tmp 0.243285 0.059594 4.082347 4.458323e-05 0.126482 0.360088 tmp_tmp_interaction -0.007862 0.001850 -4.248628 2.150837e-05 -0.011488 -0.004235 pre 0.430679 0.031464 13.687868 1.199786e-42 0.369010 0.492348 pre_pre_interaction -0.025172 0.001932 -13.031066 8.146185e-39 -0.028959 -0.021386 lnfer_irrigation1_tmp_interaction -0.001285 0.001061 -1.211259 2.257960e-01 -0.003365 0.000795 lnfer_irrigation12_tmp_tmp_interaction 0.000004 0.000003 1.249374 2.115284e-01 -0.000002 0.000010 lnfer_irrigation1_pre_interaction -0.006504 0.001022 -6.364307 1.961734e-10 -0.008508 -0.004501 lnfer_irrigation12_pre_pre_interaction 0.000050 0.000010 5.190225 2.100405e-07 0.000031 0.000069
/Users/chenchenren/anaconda3/lib/python3.11/site-packages/statsmodels/base/model.py:1888: ValueWarning: covariance of constraints does not have full rank. The number of constraints is 385, but rank is 11 warnings.warn('covariance of constraints does not have full '
# Filter data for irrigated corn
data = df_corn[(df_corn['group'] == 1)&(df_corn['zone'] == 3)]
# Prepare the interaction terms
data['pre_pre_interaction'] = data['pre'] * data['pre']
data['irrigation12'] = data['irrigation1'] * data['irrigation1']
data['lnfer_irrigation1_pre_interaction'] = data['lnfer'] * data['irrigation1'] * data['pre']
data['lnfer_irrigation12_pre_pre_interaction'] = data['lnfer'] * data['irrigation1'] * data['irrigation1'] * data['pre'] * data['pre']
data['tmp_tmp_interaction'] = data['tmp'] * data['tmp']
data['lnfer_irrigation1_tmp_interaction'] = data['lnfer'] * data['irrigation1'] * data['tmp']
data['lnfer_irrigation12_tmp_tmp_interaction'] = data['lnfer'] * data['irrigation1'] * data['irrigation1'] * data['tmp'] * data['tmp']
# Define the formula for the regression model
formula = 'lnyield ~ lnfer + irrigation1 + irrigation12 + tmp + tmp_tmp_interaction + pre + pre_pre_interaction+ lnfer_irrigation1_tmp_interaction+lnfer_irrigation12_tmp_tmp_interaction+lnfer_irrigation1_pre_interaction +lnfer_irrigation12_pre_pre_interaction + C(geoid)'
# Run the regression using the formula interface with clustered standard errors
model = sm.OLS.from_formula(formula, data=data).fit(cov_type='cluster', cov_kwds={'groups': data['geoid']})
# Extract the summary table
summary = model.summary2().tables[1]
# Filter out rows corresponding to dummy variables for geoid
exclude_terms = ['C(geoid)']
pattern = '|'.join([re.escape(term) for term in exclude_terms])
filtered_summary = summary.loc[~summary.index.str.contains(pattern)]
fixed_effects_only_model = sm.OLS.from_formula('lnyield ~ C(geoid)', data=data).fit()
# Calculate the total sum of squares (TSS) using the model with only fixed effects
tss_within = np.sum((fixed_effects_only_model.resid) ** 2)
# Calculate the residual sum of squares (RSS) from your main model
rss_within = np.sum((model.resid) ** 2)
# Calculate the within R-squared
within_r2 = 1 - (rss_within / tss_within)
# Format the OLS summary with the within R-squared
ols_info = f"""
OLS Regression Results
==============================================================================
Dep. Variable: lnyield R-squared: {model.rsquared:.3f}
Model: OLS Adj. R-squared: {model.rsquared_adj:.3f}
Within R-squared: {within_r2:.3f}
Method: Least Squares F-statistic: {model.fvalue:.1f}
Date: {datetime.now():%a, %d %b %Y} Prob (F-statistic): {model.f_pvalue:.2e}
Time: {datetime.now():%H:%M:%S} Log-Likelihood: {model.llf:.2f}
No. Observations: {model.nobs:.0f} AIC: {model.aic:.1f}
Df Residuals: {model.df_resid:.0f} BIC: {model.bic:.1f}
Df Model: {model.df_model:.0f}
Covariance Type: cluster
==============================================================================
"""
# Combine the OLS info with the filtered summary
final_summary = ols_info + "\n" + filtered_summary.to_string()
# Print the final summary
print(final_summary)
/var/folders/vd/0_phd7hx2n51y4412862zww00000gp/T/ipykernel_3056/2621376652.py:5: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame. Try using .loc[row_indexer,col_indexer] = value instead See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy data['pre_pre_interaction'] = data['pre'] * data['pre'] /var/folders/vd/0_phd7hx2n51y4412862zww00000gp/T/ipykernel_3056/2621376652.py:6: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame. Try using .loc[row_indexer,col_indexer] = value instead See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy data['irrigation12'] = data['irrigation1'] * data['irrigation1'] /var/folders/vd/0_phd7hx2n51y4412862zww00000gp/T/ipykernel_3056/2621376652.py:7: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame. Try using .loc[row_indexer,col_indexer] = value instead See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy data['lnfer_irrigation1_pre_interaction'] = data['lnfer'] * data['irrigation1'] * data['pre'] /var/folders/vd/0_phd7hx2n51y4412862zww00000gp/T/ipykernel_3056/2621376652.py:8: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame. Try using .loc[row_indexer,col_indexer] = value instead See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy data['lnfer_irrigation12_pre_pre_interaction'] = data['lnfer'] * data['irrigation1'] * data['irrigation1'] * data['pre'] * data['pre'] /var/folders/vd/0_phd7hx2n51y4412862zww00000gp/T/ipykernel_3056/2621376652.py:10: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame. Try using .loc[row_indexer,col_indexer] = value instead See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy data['tmp_tmp_interaction'] = data['tmp'] * data['tmp'] /var/folders/vd/0_phd7hx2n51y4412862zww00000gp/T/ipykernel_3056/2621376652.py:11: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame. Try using .loc[row_indexer,col_indexer] = value instead See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy data['lnfer_irrigation1_tmp_interaction'] = data['lnfer'] * data['irrigation1'] * data['tmp'] /var/folders/vd/0_phd7hx2n51y4412862zww00000gp/T/ipykernel_3056/2621376652.py:12: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame. Try using .loc[row_indexer,col_indexer] = value instead See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy data['lnfer_irrigation12_tmp_tmp_interaction'] = data['lnfer'] * data['irrigation1'] * data['irrigation1'] * data['tmp'] * data['tmp']
OLS Regression Results ============================================================================== Dep. Variable: lnyield R-squared: 0.633 Model: OLS Adj. R-squared: 0.569 Within R-squared: 0.289 Method: Least Squares F-statistic: -217776744522.7 Date: Wed, 28 Aug 2024 Prob (F-statistic): 1.00e+00 Time: 17:02:16 Log-Likelihood: 768.82 No. Observations: 2826 AIC: -701.6 Df Residuals: 2408 BIC: 1784.0 Df Model: 417 Covariance Type: cluster ============================================================================== Coef. Std.Err. z P>|z| [0.025 0.975] Intercept 6.389456 0.666257 9.590075 8.802091e-22 5.083616 7.695296 lnfer 0.437887 0.081787 5.353988 8.603674e-08 0.277587 0.598186 irrigation1 0.120379 0.093586 1.286283 1.983442e-01 -0.063047 0.303805 irrigation12 -0.008318 0.004713 -1.765050 7.755538e-02 -0.017555 0.000919 tmp -0.064156 0.062492 -1.026640 3.045899e-01 -0.186638 0.058325 tmp_tmp_interaction 0.000135 0.001770 0.076487 9.390319e-01 -0.003334 0.003605 pre 0.373887 0.033229 11.251865 2.267459e-29 0.308759 0.439014 pre_pre_interaction -0.017997 0.001895 -9.495813 2.185004e-21 -0.021711 -0.014282 lnfer_irrigation1_tmp_interaction 0.001887 0.000959 1.968281 4.903566e-02 0.000008 0.003767 lnfer_irrigation12_tmp_tmp_interaction -0.000003 0.000003 -1.260237 2.075838e-01 -0.000009 0.000002 lnfer_irrigation1_pre_interaction -0.007009 0.000846 -8.288348 1.148319e-16 -0.008667 -0.005352 lnfer_irrigation12_pre_pre_interaction 0.000036 0.000005 7.591397 3.164756e-14 0.000026 0.000045
/Users/chenchenren/anaconda3/lib/python3.11/site-packages/statsmodels/base/model.py:1888: ValueWarning: covariance of constraints does not have full rank. The number of constraints is 417, but rank is 11 warnings.warn('covariance of constraints does not have full '
# Filter data for rainfed corn
data = df_corn[(df_corn['group'] == 2)&((df_corn['zone'] == 1))]
# Prepare the interaction terms
data['pre_pre_interaction'] = data['pre'] * data['pre']
data['lnfer_pre_interaction'] = data['lnfer'] * data['pre']
data['lnfer_pre_pre_interaction'] = data['lnfer'] * data['pre'] * data['pre']
data['tmp_tmp_interaction'] = data['tmp'] * data['tmp']
data['lnfer_tmp_interaction'] = data['lnfer'] * data['tmp']
data['lnfer_tmp_tmp_interaction'] = data['lnfer'] * data['tmp'] * data['tmp']
# Define the formula for the regression model
formula = 'lnyield ~ lnfer + tmp + tmp_tmp_interaction + pre + pre_pre_interaction +lnfer_tmp_interaction+lnfer_tmp_tmp_interaction+lnfer_pre_interaction+lnfer_pre_pre_interaction+ C(geoid)'
# Run the regression using the formula interface with clustered standard errors
model = sm.OLS.from_formula(formula, data=data).fit(cov_type='cluster', cov_kwds={'groups': data['geoid']})
# Extract the summary table
summary = model.summary2().tables[1]
# Filter out rows corresponding to dummy variables for geoid
exclude_terms = ['C(geoid)']
pattern = '|'.join([re.escape(term) for term in exclude_terms])
filtered_summary = summary.loc[~summary.index.str.contains(pattern)]
fixed_effects_only_model = sm.OLS.from_formula('lnyield ~ C(geoid)', data=data).fit()
# Calculate the total sum of squares (TSS) using the model with only fixed effects
tss_within = np.sum((fixed_effects_only_model.resid) ** 2)
# Calculate the residual sum of squares (RSS) from your main model
rss_within = np.sum((model.resid) ** 2)
# Calculate the within R-squared
within_r2 = 1 - (rss_within / tss_within)
# Format the OLS summary with the within R-squared
ols_info = f"""
OLS Regression Results
==============================================================================
Dep. Variable: lnyield R-squared: {model.rsquared:.3f}
Model: OLS Adj. R-squared: {model.rsquared_adj:.3f}
Within R-squared: {within_r2:.3f}
Method: Least Squares F-statistic: {model.fvalue:.1f}
Date: {datetime.now():%a, %d %b %Y} Prob (F-statistic): {model.f_pvalue:.2e}
Time: {datetime.now():%H:%M:%S} Log-Likelihood: {model.llf:.2f}
No. Observations: {model.nobs:.0f} AIC: {model.aic:.1f}
Df Residuals: {model.df_resid:.0f} BIC: {model.bic:.1f}
Df Model: {model.df_model:.0f}
Covariance Type: cluster
==============================================================================
"""
# Combine the OLS info with the filtered summary
final_summary = ols_info + "\n" + filtered_summary.to_string()
# Print the final summary
print(final_summary)
OLS Regression Results ============================================================================== Dep. Variable: lnyield R-squared: 0.803 Model: OLS Adj. R-squared: 0.772 Within R-squared: 0.142 Method: Least Squares F-statistic: 17185022218331.2 Date: Wed, 28 Aug 2024 Prob (F-statistic): 0.00e+00 Time: 17:02:16 Log-Likelihood: 971.11 No. Observations: 1613 AIC: -1500.2 Df Residuals: 1392 BIC: -309.9 Df Model: 220 Covariance Type: cluster ============================================================================== Coef. Std.Err. z P>|z| [0.025 0.975] Intercept 5.037728 2.531666 1.989887 0.046603 0.075754 9.999703 lnfer 0.461925 0.491526 0.939779 0.347331 -0.501447 1.425298 tmp 0.108871 0.352919 0.308486 0.757712 -0.582837 0.800579 tmp_tmp_interaction -0.005214 0.012338 -0.422636 0.672561 -0.029396 0.018967 pre 0.389833 0.211415 1.843922 0.065194 -0.024533 0.804200 pre_pre_interaction -0.020473 0.013800 -1.483511 0.137939 -0.047520 0.006575 lnfer_tmp_interaction 0.011324 0.068910 0.164334 0.869468 -0.123737 0.146385 lnfer_tmp_tmp_interaction -0.000005 0.002426 -0.002022 0.998387 -0.004759 0.004749 lnfer_pre_interaction -0.058714 0.037238 -1.576734 0.114857 -0.131700 0.014271 lnfer_pre_pre_interaction 0.003161 0.002448 1.290948 0.196722 -0.001638 0.007959
/var/folders/vd/0_phd7hx2n51y4412862zww00000gp/T/ipykernel_3056/2124702747.py:5: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame. Try using .loc[row_indexer,col_indexer] = value instead See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy data['pre_pre_interaction'] = data['pre'] * data['pre'] /var/folders/vd/0_phd7hx2n51y4412862zww00000gp/T/ipykernel_3056/2124702747.py:6: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame. Try using .loc[row_indexer,col_indexer] = value instead See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy data['lnfer_pre_interaction'] = data['lnfer'] * data['pre'] /var/folders/vd/0_phd7hx2n51y4412862zww00000gp/T/ipykernel_3056/2124702747.py:7: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame. Try using .loc[row_indexer,col_indexer] = value instead See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy data['lnfer_pre_pre_interaction'] = data['lnfer'] * data['pre'] * data['pre'] /var/folders/vd/0_phd7hx2n51y4412862zww00000gp/T/ipykernel_3056/2124702747.py:9: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame. Try using .loc[row_indexer,col_indexer] = value instead See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy data['tmp_tmp_interaction'] = data['tmp'] * data['tmp'] /var/folders/vd/0_phd7hx2n51y4412862zww00000gp/T/ipykernel_3056/2124702747.py:10: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame. Try using .loc[row_indexer,col_indexer] = value instead See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy data['lnfer_tmp_interaction'] = data['lnfer'] * data['tmp'] /var/folders/vd/0_phd7hx2n51y4412862zww00000gp/T/ipykernel_3056/2124702747.py:11: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame. Try using .loc[row_indexer,col_indexer] = value instead See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy data['lnfer_tmp_tmp_interaction'] = data['lnfer'] * data['tmp'] * data['tmp'] /Users/chenchenren/anaconda3/lib/python3.11/site-packages/statsmodels/base/model.py:1888: ValueWarning: covariance of constraints does not have full rank. The number of constraints is 220, but rank is 9 warnings.warn('covariance of constraints does not have full '
# Filter data for rainfed corn
data = df_corn[(df_corn['group'] == 2)&((df_corn['zone'] == 2))]
# Prepare the interaction terms
data['pre_pre_interaction'] = data['pre'] * data['pre']
data['lnfer_pre_interaction'] = data['lnfer'] * data['pre']
data['lnfer_pre_pre_interaction'] = data['lnfer'] * data['pre'] * data['pre']
data['tmp_tmp_interaction'] = data['tmp'] * data['tmp']
data['lnfer_tmp_interaction'] = data['lnfer'] * data['tmp']
data['lnfer_tmp_tmp_interaction'] = data['lnfer'] * data['tmp'] * data['tmp']
# Define the formula for the regression model
formula = 'lnyield ~ lnfer + tmp + tmp_tmp_interaction + pre + pre_pre_interaction +lnfer_tmp_interaction+lnfer_tmp_tmp_interaction+lnfer_pre_interaction+lnfer_pre_pre_interaction+ C(geoid)'
# Run the regression using the formula interface with clustered standard errors
model = sm.OLS.from_formula(formula, data=data).fit(cov_type='cluster', cov_kwds={'groups': data['geoid']})
# Extract the summary table
summary = model.summary2().tables[1]
# Filter out rows corresponding to dummy variables for geoid
exclude_terms = ['C(geoid)']
pattern = '|'.join([re.escape(term) for term in exclude_terms])
filtered_summary = summary.loc[~summary.index.str.contains(pattern)]
fixed_effects_only_model = sm.OLS.from_formula('lnyield ~ C(geoid)', data=data).fit()
# Calculate the total sum of squares (TSS) using the model with only fixed effects
tss_within = np.sum((fixed_effects_only_model.resid) ** 2)
# Calculate the residual sum of squares (RSS) from your main model
rss_within = np.sum((model.resid) ** 2)
# Calculate the within R-squared
within_r2 = 1 - (rss_within / tss_within)
# Format the OLS summary with the within R-squared
ols_info = f"""
OLS Regression Results
==============================================================================
Dep. Variable: lnyield R-squared: {model.rsquared:.3f}
Model: OLS Adj. R-squared: {model.rsquared_adj:.3f}
Within R-squared: {within_r2:.3f}
Method: Least Squares F-statistic: {model.fvalue:.1f}
Date: {datetime.now():%a, %d %b %Y} Prob (F-statistic): {model.f_pvalue:.2e}
Time: {datetime.now():%H:%M:%S} Log-Likelihood: {model.llf:.2f}
No. Observations: {model.nobs:.0f} AIC: {model.aic:.1f}
Df Residuals: {model.df_resid:.0f} BIC: {model.bic:.1f}
Df Model: {model.df_model:.0f}
Covariance Type: cluster
==============================================================================
"""
# Combine the OLS info with the filtered summary
final_summary = ols_info + "\n" + filtered_summary.to_string()
# Print the final summary
print(final_summary)
/var/folders/vd/0_phd7hx2n51y4412862zww00000gp/T/ipykernel_3056/3708700106.py:5: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame. Try using .loc[row_indexer,col_indexer] = value instead See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy data['pre_pre_interaction'] = data['pre'] * data['pre'] /var/folders/vd/0_phd7hx2n51y4412862zww00000gp/T/ipykernel_3056/3708700106.py:6: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame. Try using .loc[row_indexer,col_indexer] = value instead See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy data['lnfer_pre_interaction'] = data['lnfer'] * data['pre'] /var/folders/vd/0_phd7hx2n51y4412862zww00000gp/T/ipykernel_3056/3708700106.py:7: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame. Try using .loc[row_indexer,col_indexer] = value instead See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy data['lnfer_pre_pre_interaction'] = data['lnfer'] * data['pre'] * data['pre'] /var/folders/vd/0_phd7hx2n51y4412862zww00000gp/T/ipykernel_3056/3708700106.py:9: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame. Try using .loc[row_indexer,col_indexer] = value instead See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy data['tmp_tmp_interaction'] = data['tmp'] * data['tmp'] /var/folders/vd/0_phd7hx2n51y4412862zww00000gp/T/ipykernel_3056/3708700106.py:10: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame. Try using .loc[row_indexer,col_indexer] = value instead See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy data['lnfer_tmp_interaction'] = data['lnfer'] * data['tmp'] /var/folders/vd/0_phd7hx2n51y4412862zww00000gp/T/ipykernel_3056/3708700106.py:11: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame. Try using .loc[row_indexer,col_indexer] = value instead See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy data['lnfer_tmp_tmp_interaction'] = data['lnfer'] * data['tmp'] * data['tmp'] /Users/chenchenren/anaconda3/lib/python3.11/site-packages/statsmodels/base/model.py:1888: ValueWarning: covariance of constraints does not have full rank. The number of constraints is 347, but rank is 9 warnings.warn('covariance of constraints does not have full '
OLS Regression Results ============================================================================== Dep. Variable: lnyield R-squared: 0.645 Model: OLS Adj. R-squared: 0.594 Within R-squared: 0.361 Method: Least Squares F-statistic: 98355558560.4 Date: Wed, 28 Aug 2024 Prob (F-statistic): 0.00e+00 Time: 17:02:16 Log-Likelihood: 1479.48 No. Observations: 2765 AIC: -2263.0 Df Residuals: 2417 BIC: -201.1 Df Model: 347 Covariance Type: cluster ============================================================================== Coef. Std.Err. z P>|z| [0.025 0.975] Intercept 9.558795 5.008853 1.908380 0.056342 -0.258378 19.375967 lnfer -1.027908 0.913702 -1.124993 0.260592 -2.818730 0.762914 tmp -0.811899 0.583617 -1.391150 0.164180 -1.955768 0.331970 tmp_tmp_interaction 0.024148 0.017884 1.350292 0.176922 -0.010903 0.059199 pre 0.025515 0.186386 0.136893 0.891116 -0.339794 0.390824 pre_pre_interaction -0.002868 0.011725 -0.244611 0.806758 -0.025848 0.020112 lnfer_tmp_interaction 0.233981 0.107230 2.182052 0.029106 0.023814 0.444148 lnfer_tmp_tmp_interaction -0.007265 0.003294 -2.205842 0.027395 -0.013721 -0.000810 lnfer_pre_interaction 0.022029 0.034336 0.641564 0.521156 -0.045269 0.089326 lnfer_pre_pre_interaction -0.000660 0.002160 -0.305279 0.760154 -0.004894 0.003575
# Filter data for rainfed corn
data = df_corn[(df_corn['group'] == 2)&((df_corn['zone'] == 3))]
# Prepare the interaction terms
data['pre_pre_interaction'] = data['pre'] * data['pre']
data['lnfer_pre_interaction'] = data['lnfer'] * data['pre']
data['lnfer_pre_pre_interaction'] = data['lnfer'] * data['pre'] * data['pre']
data['tmp_tmp_interaction'] = data['tmp'] * data['tmp']
data['lnfer_tmp_interaction'] = data['lnfer'] * data['tmp']
data['lnfer_tmp_tmp_interaction'] = data['lnfer'] * data['tmp'] * data['tmp']
# Define the formula for the regression model
formula = 'lnyield ~ lnfer + tmp + tmp_tmp_interaction + pre + pre_pre_interaction +lnfer_tmp_interaction+lnfer_tmp_tmp_interaction+lnfer_pre_interaction+lnfer_pre_pre_interaction+ C(geoid)'
# Run the regression using the formula interface with clustered standard errors
model = sm.OLS.from_formula(formula, data=data).fit(cov_type='cluster', cov_kwds={'groups': data['geoid']})
# Extract the summary table
summary = model.summary2().tables[1]
# Filter out rows corresponding to dummy variables for geoid
exclude_terms = ['C(geoid)']
pattern = '|'.join([re.escape(term) for term in exclude_terms])
filtered_summary = summary.loc[~summary.index.str.contains(pattern)]
fixed_effects_only_model = sm.OLS.from_formula('lnyield ~ C(geoid)', data=data).fit()
# Calculate the total sum of squares (TSS) using the model with only fixed effects
tss_within = np.sum((fixed_effects_only_model.resid) ** 2)
# Calculate the residual sum of squares (RSS) from your main model
rss_within = np.sum((model.resid) ** 2)
# Calculate the within R-squared
within_r2 = 1 - (rss_within / tss_within)
# Format the OLS summary with the within R-squared
ols_info = f"""
OLS Regression Results
==============================================================================
Dep. Variable: lnyield R-squared: {model.rsquared:.3f}
Model: OLS Adj. R-squared: {model.rsquared_adj:.3f}
Within R-squared: {within_r2:.3f}
Method: Least Squares F-statistic: {model.fvalue:.1f}
Date: {datetime.now():%a, %d %b %Y} Prob (F-statistic): {model.f_pvalue:.2e}
Time: {datetime.now():%H:%M:%S} Log-Likelihood: {model.llf:.2f}
No. Observations: {model.nobs:.0f} AIC: {model.aic:.1f}
Df Residuals: {model.df_resid:.0f} BIC: {model.bic:.1f}
Df Model: {model.df_model:.0f}
Covariance Type: cluster
==============================================================================
"""
# Combine the OLS info with the filtered summary
final_summary = ols_info + "\n" + filtered_summary.to_string()
# Print the final summary
print(final_summary)
/var/folders/vd/0_phd7hx2n51y4412862zww00000gp/T/ipykernel_3056/2848172568.py:5: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame. Try using .loc[row_indexer,col_indexer] = value instead See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy data['pre_pre_interaction'] = data['pre'] * data['pre'] /var/folders/vd/0_phd7hx2n51y4412862zww00000gp/T/ipykernel_3056/2848172568.py:6: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame. Try using .loc[row_indexer,col_indexer] = value instead See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy data['lnfer_pre_interaction'] = data['lnfer'] * data['pre'] /var/folders/vd/0_phd7hx2n51y4412862zww00000gp/T/ipykernel_3056/2848172568.py:7: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame. Try using .loc[row_indexer,col_indexer] = value instead See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy data['lnfer_pre_pre_interaction'] = data['lnfer'] * data['pre'] * data['pre'] /var/folders/vd/0_phd7hx2n51y4412862zww00000gp/T/ipykernel_3056/2848172568.py:9: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame. Try using .loc[row_indexer,col_indexer] = value instead See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy data['tmp_tmp_interaction'] = data['tmp'] * data['tmp'] /var/folders/vd/0_phd7hx2n51y4412862zww00000gp/T/ipykernel_3056/2848172568.py:10: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame. Try using .loc[row_indexer,col_indexer] = value instead See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy data['lnfer_tmp_interaction'] = data['lnfer'] * data['tmp'] /var/folders/vd/0_phd7hx2n51y4412862zww00000gp/T/ipykernel_3056/2848172568.py:11: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame. Try using .loc[row_indexer,col_indexer] = value instead See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy data['lnfer_tmp_tmp_interaction'] = data['lnfer'] * data['tmp'] * data['tmp'] /Users/chenchenren/anaconda3/lib/python3.11/site-packages/statsmodels/base/model.py:1888: ValueWarning: covariance of constraints does not have full rank. The number of constraints is 330, but rank is 9 warnings.warn('covariance of constraints does not have full '
OLS Regression Results ============================================================================== Dep. Variable: lnyield R-squared: 0.610 Model: OLS Adj. R-squared: 0.534 Within R-squared: 0.465 Method: Least Squares F-statistic: 9439797092.9 Date: Wed, 28 Aug 2024 Prob (F-statistic): 0.00e+00 Time: 17:02:17 Log-Likelihood: 346.20 No. Observations: 2029 AIC: -30.4 Df Residuals: 1698 BIC: 1828.3 Df Model: 330 Covariance Type: cluster ============================================================================== Coef. Std.Err. z P>|z| [0.025 0.975] Intercept 19.811681 10.060982 1.969160 0.048935 0.092518 39.530843 lnfer -1.146685 1.778483 -0.644755 0.519086 -4.632448 2.339077 tmp -1.959786 1.092110 -1.794495 0.072734 -4.100282 0.180710 tmp_tmp_interaction 0.051468 0.029999 1.715623 0.086231 -0.007330 0.110266 pre 1.053770 0.537326 1.961136 0.049863 0.000630 2.106911 pre_pre_interaction -0.056528 0.030081 -1.879235 0.060212 -0.115485 0.002428 lnfer_tmp_interaction 0.277258 0.187402 1.479485 0.139011 -0.090043 0.644559 lnfer_tmp_tmp_interaction -0.007477 0.005139 -1.454984 0.145674 -0.017550 0.002595 lnfer_pre_interaction -0.087917 0.089388 -0.983539 0.325342 -0.263115 0.087281 lnfer_pre_pre_interaction 0.005110 0.004994 1.023229 0.306199 -0.004678 0.014898