import pandas as pd
import numpy as np
import statsmodels.api as sm
import matplotlib.pyplot as plt
import matplotlib.colors as mcolors
import netCDF4 as nc
import pandas as pd
import os
import csv
from glob import glob
import xarray as xr
import matplotlib.ticker as ticker
import numpy as np
import pandas as pd
import statsmodels.api as sm
import matplotlib.pyplot as plt
from matplotlib.gridspec import GridSpec
import numpy as np
import pandas as pd
import statsmodels.api as sm
import matplotlib.pyplot as plt
from matplotlib.gridspec import GridSpec
from matplotlib.colors import LinearSegmentedColormap, Normalize
from mpl_toolkits.axes_grid1 import make_axes_locatable
import geopandas as gpd
import matplotlib.colors as mcolors
# Step 1: Data Preparation
# Load your dataset into a pandas DataFrame
os.chdir('/Users/chenchenren/postdoc/paper/2N and water-US/Figure 3 Yiled under warming scenario/')
excel_file = "../regression/soybean_reg_data.csv"
# Read the specific sheet from the Excel file
reg_data= pd.read_csv(excel_file)
print(reg_data)
reg_data['tmp_tmp_interaction'] = reg_data['tmp'] ** 2
reg_data['pre_pre_interaction'] = reg_data['pre'] ** 2
reg_data['irrigation12'] = reg_data['irrigation1'] ** 2
reg_data['lnfer_irrigation1_tmp_interaction'] = reg_data['lnfer'] * reg_data['irrigation1'] * reg_data['tmp']
reg_data['lnfer_irrigation12_tmp_tmp_interaction'] = reg_data['lnfer'] * reg_data['irrigation12'] * reg_data['tmp'] ** 2
reg_data['lnfer_irrigation1_pre_interaction'] = reg_data['lnfer'] * reg_data['irrigation1'] * reg_data['pre']
reg_data['lnfer_irrigation12_pre_pre_interaction'] = reg_data['lnfer'] * reg_data['irrigation12'] * reg_data['pre'] ** 2
reg_data['lnfer_tmp_interaction'] = reg_data['lnfer'] * reg_data['tmp']
reg_data['lnfer_tmp_tmp_interaction'] = reg_data['lnfer'] * reg_data['tmp'] ** 2
reg_data['lnfer_pre_interaction'] = reg_data['lnfer'] * reg_data['pre']
reg_data['lnfer_pre_pre_interaction'] = reg_data['lnfer'] * reg_data['pre'] ** 2
geoid year tmp pre irrigation1 lnyield lnfer \ 0 1001.0 2009.0 24.140660 8.808839 0.718851 7.609614 2.022537 1 1001.0 2010.0 25.675484 5.438990 0.830087 7.035731 1.910005 2 1001.0 2011.0 24.544823 5.270162 0.595086 7.466514 2.025641 3 1001.0 2013.0 24.036592 7.514143 0.001241 7.926855 2.906756 4 1003.0 2008.0 24.727230 8.675528 0.001980 7.934111 1.240602 ... ... ... ... ... ... ... ... 17577 55141.0 2016.0 17.486904 7.304833 0.000000 8.134343 2.860158 17578 55141.0 2017.0 16.590237 6.467205 0.000000 7.978877 2.633171 17579 55141.0 2018.0 17.266659 8.373576 0.000000 7.838077 2.639034 17580 55141.0 2019.0 16.238592 8.735889 0.000000 7.912185 2.671287 17581 55141.0 2020.0 16.202635 6.832954 0.000000 8.173033 2.625827 group zone 0 1 5.0 1 1 5.0 2 1 5.0 3 1 5.0 4 1 5.0 ... ... ... 17577 2 1.0 17578 2 1.0 17579 2 1.0 17580 2 1.0 17581 2 1.0 [17582 rows x 9 columns]
excel_file = "../regression/climate_zone.csv"
# Read the specific sheet from the Excel file
climate_zone= pd.read_csv(excel_file)
# Load data from an Excel file
excel_file = "scenario_management.xlsx"
sheet_name = "soybean"
data = pd.read_excel(excel_file, sheet_name=sheet_name)
data=pd.merge(data, climate_zone, on='geoid', how='left')
data1=data[data['sample'] == 1].copy()
data1=data1.sort_values(by=['geoid', 'scenario'])
print(data1)
data2015=data1[data1['scenario'] == 2015].copy()
print(data2015)
data1['tmp'] = data1.groupby(['geoid','scenario'])['tmp'].transform('mean')
data1['pre'] = data1.groupby(['geoid','scenario'])['pre'].transform('mean')
data1['tmp2015'] = data1.groupby(['geoid','scenario'])['tmp2015'].transform('mean')
data1['pre2015'] = data1.groupby(['geoid','scenario'])['pre2015'].transform('mean')
data1=data1.drop_duplicates(subset=['geoid','scenario'])
data1=data1.drop(['model','lnyield'], axis=1)
print(data1)
geoid year tmp pre har irrigation1 lnyield_obs \ 26081 1001 2015 25.44350 5.702415 779.625 0.377088 7.509678 26084 1001 2015 24.71859 5.866428 779.625 0.377088 7.509678 26086 1001 2015 25.15585 5.960842 779.625 0.377088 7.509678 26087 1001 2015 24.83175 7.541522 779.625 0.377088 7.509678 26089 1001 2015 25.34826 6.200767 779.625 0.377088 7.509678 ... ... ... ... ... ... ... ... 43802 55141 2015 15.16129 7.028600 6700.507 0.000000 7.921128 43804 55141 2015 16.77098 5.360460 6700.507 0.000000 7.921128 43805 55141 2015 16.46397 6.299626 6700.507 0.000000 7.921128 43807 55141 2015 16.07257 6.365017 6700.507 0.000000 7.921128 43809 55141 2015 18.05135 5.488240 6700.507 0.000000 7.921128 lnfer lnyield group sample scenario pre2015 model tmp2015 \ 26081 2.457656 NaN 1 1 1.5 4.174539 gfdl 26.21985 26084 2.457656 NaN 1 1 1.5 6.327019 ipsl 24.13167 26086 2.457656 NaN 1 1 1.5 4.268701 mpi 24.93584 26087 2.457656 NaN 1 1 1.5 8.572182 mri 25.04051 26089 2.457656 NaN 1 1 1.5 9.683448 ukesm1 24.72551 ... ... ... ... ... ... ... ... ... 43802 2.556201 NaN 2 1 2015.0 7.028600 gfdl 15.16129 43804 2.556201 NaN 2 1 2015.0 5.360460 ipsl 16.77098 43805 2.556201 NaN 2 1 2015.0 6.299626 mpi 16.46397 43807 2.556201 NaN 2 1 2015.0 6.365017 mri 16.07257 43809 2.556201 NaN 2 1 2015.0 5.488240 ukesm1 18.05135 zone 26081 5.0 26084 5.0 26086 5.0 26087 5.0 26089 5.0 ... ... 43802 1.0 43804 1.0 43805 1.0 43807 1.0 43809 1.0 [26595 rows x 16 columns] geoid year tmp pre har irrigation1 lnyield_obs \ 17218 1001 2015 24.93584 4.268701 779.625 0.377088 7.509678 26082 1001 2015 26.21985 4.174539 779.625 0.377088 7.509678 26083 1001 2015 24.13167 6.327019 779.625 0.377088 7.509678 26088 1001 2015 25.04051 8.572182 779.625 0.377088 7.509678 26090 1001 2015 24.72551 9.683448 779.625 0.377088 7.509678 ... ... ... ... ... ... ... ... 43802 55141 2015 15.16129 7.028600 6700.507 0.000000 7.921128 43804 55141 2015 16.77098 5.360460 6700.507 0.000000 7.921128 43805 55141 2015 16.46397 6.299626 6700.507 0.000000 7.921128 43807 55141 2015 16.07257 6.365017 6700.507 0.000000 7.921128 43809 55141 2015 18.05135 5.488240 6700.507 0.000000 7.921128 lnfer lnyield group sample scenario pre2015 model tmp2015 \ 17218 2.457656 NaN 1 1 2015.0 4.268701 mpi 24.93584 26082 2.457656 NaN 1 1 2015.0 4.174539 gfdl 26.21985 26083 2.457656 NaN 1 1 2015.0 6.327019 ipsl 24.13167 26088 2.457656 NaN 1 1 2015.0 8.572182 mri 25.04051 26090 2.457656 NaN 1 1 2015.0 9.683448 ukesm1 24.72551 ... ... ... ... ... ... ... ... ... 43802 2.556201 NaN 2 1 2015.0 7.028600 gfdl 15.16129 43804 2.556201 NaN 2 1 2015.0 5.360460 ipsl 16.77098 43805 2.556201 NaN 2 1 2015.0 6.299626 mpi 16.46397 43807 2.556201 NaN 2 1 2015.0 6.365017 mri 16.07257 43809 2.556201 NaN 2 1 2015.0 5.488240 ukesm1 18.05135 zone 17218 5.0 26082 5.0 26083 5.0 26088 5.0 26090 5.0 ... ... 43802 1.0 43804 1.0 43805 1.0 43807 1.0 43809 1.0 [8865 rows x 16 columns] geoid year tmp pre har irrigation1 lnyield_obs \ 26081 1001 2015 25.099590 6.254395 779.625 0.377088 7.509678 17216 1001 2015 26.784940 6.208599 779.625 0.377088 7.509678 17218 1001 2015 25.010676 6.605178 779.625 0.377088 7.509678 17222 1003 2015 26.261318 9.274925 6358.433 0.738441 7.927428 17221 1003 2015 27.808318 8.926058 6358.433 0.738441 7.927428 ... ... ... ... ... ... ... ... 26072 55139 2015 20.012748 5.680057 14541.420 0.000000 8.028848 26071 55139 2015 17.287318 5.693542 14541.420 0.000000 8.028848 26077 55141 2015 17.398562 5.719637 6700.507 0.000000 7.921128 26076 55141 2015 19.304302 6.116161 6700.507 0.000000 7.921128 43802 55141 2015 16.504032 6.108389 6700.507 0.000000 7.921128 lnfer group sample scenario pre2015 tmp2015 zone 26081 2.457656 1 1 1.5 6.605178 25.010676 5.0 17216 2.457656 1 1 3.0 6.605178 25.010676 5.0 17218 2.457656 1 1 2015.0 6.605178 25.010676 5.0 17222 1.292553 1 1 1.5 9.548671 26.127096 5.0 17221 1.292553 1 1 3.0 9.548671 26.127096 5.0 ... ... ... ... ... ... ... ... 26072 1.255133 2 1 3.0 5.693542 17.287318 2.0 26071 1.255133 2 1 2015.0 5.693542 17.287318 2.0 26077 2.556201 2 1 1.5 6.108389 16.504032 1.0 26076 2.556201 2 1 3.0 6.108389 16.504032 1.0 43802 2.556201 2 1 2015.0 6.108389 16.504032 1.0 [5319 rows x 14 columns]
# Load data from an Excel file
excel_file = "scenario_management.xlsx"
sheet_name = "soybean"
data = pd.read_excel(excel_file, sheet_name=sheet_name)
# Assuming climate_zone is already loaded or defined
data = pd.merge(data, climate_zone, on='geoid', how='left')
# Create interaction terms
data['tmp_tmp_interaction'] = data['tmp'] ** 2
data['pre_pre_interaction'] = data['pre'] ** 2
data['irrigation12'] = data['irrigation1'] ** 2
data['lnfer_irrigation1_tmp_interaction'] = data['lnfer'] * data['irrigation1'] * data['tmp']
data['lnfer_irrigation12_tmp_tmp_interaction'] = data['lnfer'] * data['irrigation12'] * data['tmp'] ** 2
data['lnfer_irrigation1_pre_interaction'] = data['lnfer'] * data['irrigation1'] * data['pre']
data['lnfer_irrigation12_pre_pre_interaction'] = data['lnfer'] * data['irrigation12'] * data['pre'] ** 2
data['lnfer_tmp_interaction'] = data['lnfer'] * data['tmp']
data['lnfer_tmp_tmp_interaction'] = data['lnfer'] * data['tmp'] ** 2
data['lnfer_pre_interaction'] = data['lnfer'] * data['pre']
data['lnfer_pre_pre_interaction'] = data['lnfer'] * data['pre'] ** 2
response_variable = 'lnyield'
# Function to perform regression analysis
def perform_regression(group_data, predictor_variables):
X = sm.add_constant(group_data[predictor_variables])
y = group_data[response_variable]
return sm.OLS(y, X).fit()
# Initialize a nested dictionary to store regression results
reg = {}
data_rows = []
sample_data = reg_data
project_data = data.copy()
#zone_ = [1, 2, 3]
zone_ = [1, 2, 3,4,5]
group_ = [1, 2]
# Check the unique values in the 'zone' and 'group' columns
print(sample_data)
print("Unique zones:", sample_data['zone'].unique())
print("Unique groups:", sample_data['group'].unique())
# Iterate through groups and zones in sample_data
for zone in zone_:
reg[zone] = {}
for group in group_:
print(f"\nSubset for Zone {zone}, Group {group}:")
# Calculate y_temp_base based on regression parameters
tmp_values = project_data[(project_data['group'] == group) & (project_data['zone'] == zone)]['tmp'].values
pre_values = project_data[(project_data['group'] == group) & (project_data['zone'] == zone)]['pre'].values
lnfer_values = project_data[(project_data['group'] == group) & (project_data['zone'] == zone)]['lnfer'].values
irrigation1_values = project_data[(project_data['group'] == group) & (project_data['zone'] == zone)]['irrigation1'].values
if group == 1:
group_data1 = sample_data[(sample_data['zone'] == zone) & (sample_data['group'] == group)]
print(f"\nSubset for Zone {zone}, Group {group}:")
print(len(group_data1))
formula = 'lnyield ~ lnfer + irrigation1 + irrigation12 + tmp + tmp_tmp_interaction + pre + pre_pre_interaction+ lnfer_irrigation1_tmp_interaction+lnfer_irrigation12_tmp_tmp_interaction+lnfer_irrigation1_pre_interaction +lnfer_irrigation12_pre_pre_interaction + C(geoid)'
# Run the regression using the formula interface with clustered standard errors
reg_model = sm.OLS.from_formula(formula, data=group_data1).fit(cov_type='cluster', cov_kwds={'groups': group_data1['geoid']})
# model = sm.OLS.from_formula(formula, data=data).fit(cov_type='cluster', cov_kwds={'groups': data['geoid']})
print(f"Regression const for Zone {zone}, Group {group}: {reg_model.params['lnfer']}")
y_temp_base = (
reg_model.params['tmp_tmp_interaction'] * tmp_values**2 +
reg_model.params['tmp'] * tmp_values +
reg_model.params['pre_pre_interaction'] * pre_values**2 +
reg_model.params['pre'] * pre_values +
reg_model.params['lnfer'] * lnfer_values +
reg_model.params['irrigation1'] * irrigation1_values +
reg_model.params['irrigation12'] * irrigation1_values**2 +
reg_model.params['lnfer_irrigation1_tmp_interaction'] * irrigation1_values * tmp_values * lnfer_values +
reg_model.params['lnfer_irrigation12_tmp_tmp_interaction'] * irrigation1_values**2 * lnfer_values * tmp_values**2 +
reg_model.params['lnfer_irrigation1_pre_interaction'] * irrigation1_values * pre_values * lnfer_values +
reg_model.params['lnfer_irrigation12_pre_pre_interaction'] * irrigation1_values**2 * lnfer_values * pre_values**2 +
reg_model.params['Intercept']
)
else:
group_data2 = sample_data[(sample_data['zone'] == zone) & (sample_data['group'] == group)]
print(f"\nSubset for Zone {zone}, Group {group}:")
print(len(group_data2))
formula = 'lnyield ~ lnfer + tmp + tmp_tmp_interaction + pre + pre_pre_interaction +lnfer_tmp_interaction+lnfer_tmp_tmp_interaction+lnfer_pre_interaction+lnfer_pre_pre_interaction+ C(geoid)'
# Run the regression using the formula interface with clustered standard errors
reg_model = sm.OLS.from_formula(formula, data=group_data2).fit(cov_type='cluster', cov_kwds={'groups': group_data2['geoid']})
print(f"Regression const for Zone {zone}, Group {group}: {reg_model.params['lnfer']}")
y_temp_base = (
reg_model.params['tmp_tmp_interaction'] * tmp_values**2 +
reg_model.params['tmp'] * tmp_values +
reg_model.params['pre_pre_interaction'] * pre_values**2 +
reg_model.params['pre'] * pre_values +
reg_model.params['lnfer'] * lnfer_values +
reg_model.params['lnfer_tmp_interaction'] * tmp_values * lnfer_values +
reg_model.params['lnfer_tmp_tmp_interaction'] * lnfer_values * tmp_values**2 +
reg_model.params['lnfer_pre_interaction'] * pre_values * lnfer_values +
reg_model.params['lnfer_pre_pre_interaction'] * lnfer_values * pre_values**2 +
reg_model.params['Intercept']
)
# Extract additional information
geoid_values = project_data[(project_data['group'] == group) & (project_data['zone'] == zone)]['geoid'].values
scenario_values = project_data[(project_data['group'] == group) & (project_data['zone'] == zone)]['scenario'].values
lnyield_obs_values = project_data[(project_data['group'] == group) & (project_data['zone'] == zone)]['lnyield_obs'].values
har_values = project_data[(project_data['group'] == group) & (project_data['zone'] == zone)]['har'].values
for tmp, pre, lnfer, irrigation1, y_temp_base_val, geoid, scenario, lnyield_obs, har in zip(
tmp_values, pre_values, lnfer_values, irrigation1_values, y_temp_base, geoid_values, scenario_values, lnyield_obs_values, har_values
):
data_rows.append([tmp, pre, lnfer, irrigation1, y_temp_base_val, geoid, scenario, lnyield_obs, har])
# Create a DataFrame from the data
results_base = pd.DataFrame(data_rows, columns=['tmp', 'pre', 'lnfer', 'irrigation1',
'y_temp_base', 'geoid', 'scenario',
'lnyield_obs', 'har'])
results_base_t = results_base[(results_base['scenario'] == 1.5)|(results_base['scenario'] == 3)]
# Rename the 'y_temp_base' column to 'y_temp_base2015'
results_base_t.rename(columns={'y_temp_base': 'y_temp_base_t'}, inplace=True)
results_base_t = results_base_t[['geoid', 'scenario','y_temp_base_t','lnyield_obs','har']]
print(results_base_t)
geoid year tmp pre irrigation1 lnyield lnfer \ 0 1001.0 2009.0 24.140660 8.808839 0.718851 7.609614 2.022537 1 1001.0 2010.0 25.675484 5.438990 0.830087 7.035731 1.910005 2 1001.0 2011.0 24.544823 5.270162 0.595086 7.466514 2.025641 3 1001.0 2013.0 24.036592 7.514143 0.001241 7.926855 2.906756 4 1003.0 2008.0 24.727230 8.675528 0.001980 7.934111 1.240602 ... ... ... ... ... ... ... ... 17577 55141.0 2016.0 17.486904 7.304833 0.000000 8.134343 2.860158 17578 55141.0 2017.0 16.590237 6.467205 0.000000 7.978877 2.633171 17579 55141.0 2018.0 17.266659 8.373576 0.000000 7.838077 2.639034 17580 55141.0 2019.0 16.238592 8.735889 0.000000 7.912185 2.671287 17581 55141.0 2020.0 16.202635 6.832954 0.000000 8.173033 2.625827 group zone tmp_tmp_interaction pre_pre_interaction irrigation12 \ 0 1 5.0 582.771465 77.595645 0.516747 1 1 5.0 659.230479 29.582608 0.689044 2 1 5.0 602.448336 27.774603 0.354128 3 1 5.0 577.757755 56.462338 0.000002 4 1 5.0 611.435903 75.264786 0.000004 ... ... ... ... ... ... 17577 2 1.0 305.791812 53.360585 0.000000 17578 2 1.0 275.235964 41.824741 0.000000 17579 2 1.0 298.137513 70.116775 0.000000 17580 2 1.0 263.691870 76.315757 0.000000 17581 2 1.0 262.525381 46.689260 0.000000 lnfer_irrigation1_tmp_interaction \ 0 35.098181 1 40.707692 2 29.587098 3 0.086677 4 0.060735 ... ... 17577 0.000000 17578 0.000000 17579 0.000000 17580 0.000000 17581 0.000000 lnfer_irrigation12_tmp_tmp_interaction \ 0 609.077595 1 867.597956 2 432.157695 3 0.002585 4 0.002973 ... ... 17577 0.000000 17578 0.000000 17579 0.000000 17580 0.000000 17581 0.000000 lnfer_irrigation1_pre_interaction \ 0 12.807198 1 8.623351 2 6.352818 3 0.027096 4 0.021309 ... ... 17577 0.000000 17578 0.000000 17579 0.000000 17580 0.000000 17581 0.000000 lnfer_irrigation12_pre_pre_interaction lnfer_tmp_interaction \ 0 81.098289 48.825390 1 38.932985 49.040295 2 19.923714 49.719000 3 0.000253 69.868508 4 0.000366 30.676653 ... ... ... 17577 0.000000 50.015312 17578 0.000000 43.684924 17579 0.000000 45.567305 17580 0.000000 43.377936 17581 0.000000 42.545316 lnfer_tmp_tmp_interaction lnfer_pre_interaction \ 0 1178.677142 17.816207 1 1259.133313 10.388496 2 1220.344050 10.675455 3 1679.400821 21.841779 4 758.548666 10.762878 ... ... ... 17577 874.612957 20.892978 17578 724.743248 17.029254 17579 786.795123 22.098154 17580 704.396612 23.336065 17581 689.346233 17.942155 lnfer_pre_pre_interaction 0 156.940101 1 56.502920 2 56.261375 3 164.122238 4 93.373652 ... ... 17577 152.619715 17578 110.131677 17579 185.040574 17580 203.861273 17581 122.597920 [17582 rows x 20 columns] Unique zones: [5. 4. 3. 2. 1.] Unique groups: [1 2] Subset for Zone 1, Group 1: Subset for Zone 1, Group 1: 1184 Regression const for Zone 1, Group 1: 0.06992806536334281 Subset for Zone 1, Group 2: Subset for Zone 1, Group 2: 2394 Regression const for Zone 1, Group 2: 0.7727196949692122 Subset for Zone 2, Group 1: Subset for Zone 2, Group 1: 2302 Regression const for Zone 2, Group 1: 0.09666603063563231 Subset for Zone 2, Group 2: Subset for Zone 2, Group 2: 3907 Regression const for Zone 2, Group 2: -1.8963576593979126 Subset for Zone 3, Group 1: Subset for Zone 3, Group 1: 920 Regression const for Zone 3, Group 1: 0.21212278406099863 Subset for Zone 3, Group 2: Subset for Zone 3, Group 2: 1432 Regression const for Zone 3, Group 2: -0.7293762997383002 Subset for Zone 4, Group 1: Subset for Zone 4, Group 1: 1725 Regression const for Zone 4, Group 1: 0.15772649659476343 Subset for Zone 4, Group 2: Subset for Zone 4, Group 2: 1178 Regression const for Zone 4, Group 2: -1.7931729857299414 Subset for Zone 5, Group 1: Subset for Zone 5, Group 1: 1376 Regression const for Zone 5, Group 1: 0.17082038322671933 Subset for Zone 5, Group 2: Subset for Zone 5, Group 2: 1164 Regression const for Zone 5, Group 2: 3.3526704864792407 geoid scenario y_temp_base_t lnyield_obs har 1165 17015 1.5 8.385420 8.326886 15982.200 1166 17015 1.5 8.362552 8.326886 15982.200 1167 17015 3.0 8.427684 8.326886 15982.200 1168 17015 3.0 8.405397 8.326886 15982.200 1170 17085 1.5 8.476838 8.158237 15509.470 ... ... ... ... ... ... 43805 48481 1.5 7.977667 7.793029 4401.709 43807 48481 3.0 7.867922 7.793029 4401.709 43808 48481 1.5 7.971409 7.793029 4401.709 43809 48481 1.5 8.045164 7.793029 4401.709 43810 48481 3.0 7.938400 7.793029 4401.709 [17730 rows x 5 columns]
/var/folders/vd/0_phd7hx2n51y4412862zww00000gp/T/ipykernel_2033/2364685230.py:123: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy results_base_t.rename(columns={'y_temp_base': 'y_temp_base_t'}, inplace=True)
# Load data from an Excel file
excel_file = "scenario_management.xlsx"
sheet_name = "soybean"
data = pd.read_excel(excel_file, sheet_name=sheet_name)
# Assuming climate_zone is already loaded or defined
data = pd.merge(data, climate_zone, on='geoid', how='left')
# Create interaction terms
data['tmp_tmp_interaction'] = data['tmp'] ** 2
data['pre_pre_interaction'] = data['pre'] ** 2
data['irrigation12'] = data['irrigation1'] ** 2
data['lnfer_irrigation1_tmp_interaction'] = data['lnfer'] * data['irrigation1'] * data['tmp']
data['lnfer_irrigation12_tmp_tmp_interaction'] = data['lnfer'] * data['irrigation12'] * data['tmp'] ** 2
data['lnfer_irrigation1_pre_interaction'] = data['lnfer'] * data['irrigation1'] * data['pre']
data['lnfer_irrigation12_pre_pre_interaction'] = data['lnfer'] * data['irrigation12'] * data['pre'] ** 2
data['lnfer_tmp_interaction'] = data['lnfer'] * data['tmp']
data['lnfer_tmp_tmp_interaction'] = data['lnfer'] * data['tmp'] ** 2
data['lnfer_pre_interaction'] = data['lnfer'] * data['pre']
data['lnfer_pre_pre_interaction'] = data['lnfer'] * data['pre'] ** 2
response_variable = 'lnyield'
# Function to perform regression analysis
def perform_regression(group_data, predictor_variables):
X = sm.add_constant(group_data[predictor_variables])
y = group_data[response_variable]
return sm.OLS(y, X).fit()
# Initialize a nested dictionary to store regression results
reg = {}
data_rows = []
sample_data = reg_data
project_data = data1
#zone_ = [1, 2, 3]
zone_ = [1, 2, 3,4,5]
group_ = [1, 2]
# Check the unique values in the 'zone' and 'group' columns
print("Unique zones:", sample_data['zone'].unique())
print("Unique groups:", sample_data['group'].unique())
# Iterate through groups and zones in sample_data
for zone in zone_:
reg[zone] = {}
for group in group_:
print(f"\nSubset for Zone {zone}, Group {group}:")
tmp_values = project_data[(project_data['group'] == group) & (project_data['zone'] == zone)]['tmp'].values
pre_values = project_data[(project_data['group'] == group) & (project_data['zone'] == zone)]['pre'].values
lnfer_values = project_data[(project_data['group'] == group) & (project_data['zone'] == zone)]['lnfer'].values
irrigation1_values = project_data[(project_data['group'] == group) & (project_data['zone'] == zone)]['irrigation1'].values
if group == 1:
group_data1 = sample_data[(sample_data['zone'] == zone) & (sample_data['group'] == group)]
print(f"\nSubset for Zone {zone}, Group {group}:")
print(len(group_data1))
formula = 'lnyield ~ lnfer + irrigation1 + irrigation12 + tmp + tmp_tmp_interaction + pre + pre_pre_interaction+ lnfer_irrigation1_tmp_interaction+lnfer_irrigation12_tmp_tmp_interaction+lnfer_irrigation1_pre_interaction +lnfer_irrigation12_pre_pre_interaction + C(geoid)'
# Run the regression using the formula interface with clustered standard errors
reg_model = sm.OLS.from_formula(formula, data=group_data1).fit(cov_type='cluster', cov_kwds={'groups': group_data1['geoid']})
# model = sm.OLS.from_formula(formula, data=data).fit(cov_type='cluster', cov_kwds={'groups': data['geoid']})
print(f"Regression const for Zone {zone}, Group {group}: {reg_model.params['lnfer']}")
y_temp_base = (
reg_model.params['tmp_tmp_interaction'] * tmp_values**2 +
reg_model.params['tmp'] * tmp_values +
reg_model.params['pre_pre_interaction'] * pre_values**2 +
reg_model.params['pre'] * pre_values +
reg_model.params['lnfer'] * lnfer_values +
reg_model.params['irrigation1'] * irrigation1_values +
reg_model.params['irrigation12'] * irrigation1_values**2 +
reg_model.params['lnfer_irrigation1_tmp_interaction'] * irrigation1_values * tmp_values * lnfer_values +
reg_model.params['lnfer_irrigation12_tmp_tmp_interaction'] * irrigation1_values**2 * lnfer_values * tmp_values**2 +
reg_model.params['lnfer_irrigation1_pre_interaction'] * irrigation1_values * pre_values * lnfer_values +
reg_model.params['lnfer_irrigation12_pre_pre_interaction'] * irrigation1_values**2 * lnfer_values * pre_values**2 +
reg_model.params['Intercept']
)
else:
group_data2 = sample_data[(sample_data['zone'] == zone) & (sample_data['group'] == group)]
print(f"\nSubset for Zone {zone}, Group {group}:")
print(len(group_data2))
formula = 'lnyield ~ lnfer + tmp + tmp_tmp_interaction + pre + pre_pre_interaction +lnfer_tmp_interaction+lnfer_tmp_tmp_interaction+lnfer_pre_interaction+lnfer_pre_pre_interaction+ C(geoid)'
# Run the regression using the formula interface with clustered standard errors
reg_model = sm.OLS.from_formula(formula, data=group_data2).fit(cov_type='cluster', cov_kwds={'groups': group_data2['geoid']})
print(f"Regression const for Zone {zone}, Group {group}: {reg_model.params['lnfer']}")
y_temp_base = (
reg_model.params['tmp_tmp_interaction'] * tmp_values**2 +
reg_model.params['tmp'] * tmp_values +
reg_model.params['pre_pre_interaction'] * pre_values**2 +
reg_model.params['pre'] * pre_values +
reg_model.params['lnfer'] * lnfer_values +
reg_model.params['lnfer_tmp_interaction'] * tmp_values * lnfer_values +
reg_model.params['lnfer_tmp_tmp_interaction'] * lnfer_values * tmp_values**2 +
reg_model.params['lnfer_pre_interaction'] * pre_values * lnfer_values +
reg_model.params['lnfer_pre_pre_interaction'] * lnfer_values * pre_values**2 +
reg_model.params['Intercept']
)
geoid_values = project_data[(project_data['group'] == group) & (project_data['zone'] == zone)]['geoid'].values
scenario_values = project_data[(project_data['group'] == group) & (project_data['zone'] == zone)]['scenario'].values
for tmp, pre, lnfer, irrigation1,y_temp_base_val, geoid, scenario in zip(
tmp_values, pre_values, lnfer_values, irrigation1_values,
y_temp_base, geoid_values, scenario_values
):
data_rows.append([tmp, pre, lnfer, irrigation1, y_temp_base_val, geoid, scenario])
# Create a DataFrame from the data
results_base = pd.DataFrame(data_rows, columns=['tmp','pre','lnfer','irrigation1',
'y_temp_base', 'geoid', 'scenario'])
results_base = results_base[results_base['scenario'] == 2015]
# Rename the 'y_temp_base' column to 'y_temp_base2015'
results_base.rename(columns={'y_temp_base': 'y_temp_base2015'}, inplace=True)
# Keep only the desired columns in results_2015
results_base2015 = results_base[['geoid', 'y_temp_base2015']]
print(results_base2015)
Unique zones: [5. 4. 3. 2. 1.] Unique groups: [1 2] Subset for Zone 1, Group 1: Subset for Zone 1, Group 1: 1184 Regression const for Zone 1, Group 1: 0.06992806536334281 Subset for Zone 1, Group 2: Subset for Zone 1, Group 2: 2394 Regression const for Zone 1, Group 2: 0.7727196949692122 Subset for Zone 2, Group 1: Subset for Zone 2, Group 1: 2302 Regression const for Zone 2, Group 1: 0.09666603063563231 Subset for Zone 2, Group 2: Subset for Zone 2, Group 2: 3907 Regression const for Zone 2, Group 2: -1.8963576593979126 Subset for Zone 3, Group 1: Subset for Zone 3, Group 1: 920 Regression const for Zone 3, Group 1: 0.21212278406099863 Subset for Zone 3, Group 2: Subset for Zone 3, Group 2: 1432 Regression const for Zone 3, Group 2: -0.7293762997383002 Subset for Zone 4, Group 1: Subset for Zone 4, Group 1: 1725 Regression const for Zone 4, Group 1: 0.15772649659476343 Subset for Zone 4, Group 2: Subset for Zone 4, Group 2: 1178 Regression const for Zone 4, Group 2: -1.7931729857299414 Subset for Zone 5, Group 1: Subset for Zone 5, Group 1: 1376 Regression const for Zone 5, Group 1: 0.17082038322671933 Subset for Zone 5, Group 2: Subset for Zone 5, Group 2: 1164 Regression const for Zone 5, Group 2: 3.3526704864792407 geoid y_temp_base2015 2 17015 8.366337 5 17085 8.455359 8 19011 8.428402 11 19013 8.497588 14 19015 8.422003 ... ... ... 5306 48321 8.068319 5309 48331 7.934468 5312 48387 8.276048 5315 48469 7.808738 5318 48481 8.045157 [1773 rows x 2 columns]
data = results_base_t.merge(results_base, left_on=['geoid'], right_on=['geoid'], how='left')
data = data.drop(columns=['scenario_y'])
data.rename(columns={'scenario_x': 'scenario'}, inplace=True)
data = data.sort_values(by=['geoid', 'scenario'])
print(data)
data['yield_warming']=np.exp(data['y_temp_base_t']-data['y_temp_base2015']+data['lnyield_obs'])
data['gap_yield_warming']=(data['yield_warming']-np.exp(data['lnyield_obs']))/1000 #tonne/ha
data['gap_production_warming']=data['gap_yield_warming']*data['har']/10000 #10000 tonne
data['yield_obs']=(np.exp(data['lnyield_obs']))/1000 #tonne/ha
data['production_obs']=data['yield_obs']*data['har']/10000 #10000 tonne
print(data)
alldata=data
# Extract the corresponding lnfer_value and irrigation1_value
output_df = data[['geoid', 'scenario','gap_yield_warming','gap_production_warming']]
# Rename the columns to match your desired output
output_df.columns = ['geoid', 'scenario','gap_yield_warming','gap_production_warming']
# Define the output CSV file pat
output_csv_file = "soybean_tem_model3.csv"
# Save the 'output_df' DataFrame to a CSV file
output_df.to_csv(output_csv_file, index=False)
print(f"Data saved to '{output_csv_file}'")
geoid scenario y_temp_base_t lnyield_obs har tmp \ 15262 1001 1.5 7.430152 7.509678 779.625 25.010676 15263 1001 1.5 7.509750 7.509678 779.625 25.010676 15265 1001 1.5 7.477690 7.509678 779.625 25.010676 15266 1001 1.5 7.589406 7.509678 779.625 25.010676 15267 1001 1.5 7.478417 7.509678 779.625 25.010676 ... ... ... ... ... ... ... 2443 55141 3.0 8.623206 7.921128 6700.507 16.504032 2445 55141 3.0 8.605665 7.921128 6700.507 16.504032 2447 55141 3.0 8.687835 7.921128 6700.507 16.504032 3386 55141 3.0 8.575415 7.921128 6700.507 16.504032 3388 55141 3.0 8.550782 7.921128 6700.507 16.504032 pre lnfer irrigation1 y_temp_base2015 15262 6.605178 2.457656 0.377088 7.534804 15263 6.605178 2.457656 0.377088 7.534804 15265 6.605178 2.457656 0.377088 7.534804 15266 6.605178 2.457656 0.377088 7.534804 15267 6.605178 2.457656 0.377088 7.534804 ... ... ... ... ... 2443 6.108389 2.556201 0.000000 8.324783 2445 6.108389 2.556201 0.000000 8.324783 2447 6.108389 2.556201 0.000000 8.324783 3386 6.108389 2.556201 0.000000 8.324783 3388 6.108389 2.556201 0.000000 8.324783 [17730 rows x 10 columns] geoid scenario y_temp_base_t lnyield_obs har tmp \ 15262 1001 1.5 7.430152 7.509678 779.625 25.010676 15263 1001 1.5 7.509750 7.509678 779.625 25.010676 15265 1001 1.5 7.477690 7.509678 779.625 25.010676 15266 1001 1.5 7.589406 7.509678 779.625 25.010676 15267 1001 1.5 7.478417 7.509678 779.625 25.010676 ... ... ... ... ... ... ... 2443 55141 3.0 8.623206 7.921128 6700.507 16.504032 2445 55141 3.0 8.605665 7.921128 6700.507 16.504032 2447 55141 3.0 8.687835 7.921128 6700.507 16.504032 3386 55141 3.0 8.575415 7.921128 6700.507 16.504032 3388 55141 3.0 8.550782 7.921128 6700.507 16.504032 pre lnfer irrigation1 y_temp_base2015 yield_warming \ 15262 6.605178 2.457656 0.377088 7.534804 1644.227654 15263 6.605178 2.457656 0.377088 7.534804 1780.456042 15265 6.605178 2.457656 0.377088 7.534804 1724.279761 15266 6.605178 2.457656 0.377088 7.534804 1928.081260 15267 6.605178 2.457656 0.377088 7.534804 1725.533160 ... ... ... ... ... ... 2443 6.108389 2.556201 0.000000 8.324783 3712.833376 2445 6.108389 2.556201 0.000000 8.324783 3648.275536 2447 6.108389 2.556201 0.000000 8.324783 3960.714792 3386 6.108389 2.556201 0.000000 8.324783 3539.567995 3388 6.108389 2.556201 0.000000 8.324783 3453.442559 gap_yield_warming gap_production_warming yield_obs production_obs 15262 -0.181398 -0.014142 1.825626 0.142330 15263 -0.045170 -0.003522 1.825626 0.142330 15265 -0.101346 -0.007901 1.825626 0.142330 15266 0.102456 0.007988 1.825626 0.142330 15267 -0.100092 -0.007803 1.825626 0.142330 ... ... ... ... ... 2443 0.957957 0.641879 2.754877 1.845907 2445 0.893399 0.598622 2.754877 1.845907 2447 1.205838 0.807973 2.754877 1.845907 3386 0.784691 0.525783 2.754877 1.845907 3388 0.698566 0.468074 2.754877 1.845907 [17730 rows x 15 columns] Data saved to 'soybean_tem_model3.csv'
data = alldata[alldata['scenario'] == 3]
data1= data[data['gap_production_warming'] >0]
print(data)
print(data1)
print(np.min(data['gap_production_warming']),np.max(data['gap_production_warming']),np.mean(data['gap_production_warming']))
print(np.percentile(data['gap_production_warming'], 5),np.percentile(data['gap_production_warming'], 95))
print("sum of gap production", np.sum(data['gap_production_warming']))
print("sum of production total", np.sum(data['production_obs']))
print("sum of production total in warming", np.sum(data1['production_obs']))
# Replace values in 'gap_yield' greater than p90 with p90
data.loc[data['gap_production_warming'] > 2.1, 'gap_production_warming'] = 2.1
data.loc[data['gap_production_warming'] < -2.1, 'gap_production_warming'] = -2.1
data['geoid'] = data['geoid'].astype(float)
# Load US county boundaries shapefile using geopandas
shapefile_path = '/Users/chenchenren/postdoc/paper/2N and water-US/Data/cb_2018_us_county_20m/surface_shapefile.shp'
gdf_county = gpd.read_file(shapefile_path)
# Ensure 'GEOID' column in the shapefile has the 'object' data type
gdf_county['GEOID'] = gdf_county['GEOID'].astype(float)
# Rename the 'geoid' column to 'GEOID' in year_2020_data
data.rename(columns={'geoid': 'GEOID'}, inplace=True)
# Convert the 'GEOID' column in the 'data' DataFrame to 'object' data type
data['GEOID'] = data['GEOID'].astype(float)
# Now, merge 'data' with county boundaries based on 'GEOID' attribute
merged_data = gdf_county.merge(data, on='GEOID', how='left')
# Filter the data to include only geometries within the specified longitude and latitude range
filtered_data = merged_data.cx[-130:-60, 25:50]
cmap = mcolors.ListedColormap(['white'] + list(plt.cm.Spectral(np.linspace(0, 1, 255))))
norm = mcolors.Normalize(vmin=-2.2, vmax=2.2)
# Plot both county boundaries and 'y_temp' values on the same map within the specified range
fig, ax = plt.subplots(figsize=(10, 4))
shapefile_path = '/Users/chenchenren/postdoc/paper/2N and water-US/Data/cb_2018_us_state_20m/cb_2018_us_state_20m.shp'
gdf_state = gpd.read_file(shapefile_path)
gdf_state.rename(columns={'GEOID': 'GEOID_state'}, inplace=True)
gdf_state = gdf_state.cx[-130:-60, 25:50]
gdf_state.boundary.plot(ax=ax, linewidth=0.3, color='grey') # Plot county boundaries in grey
sc = plt.scatter([], [], c=[], cmap=cmap, norm=norm) # Create an empty ScalarMappable for the colorbar
# Define custom colorbar ticks
custom_ticks = [-2, -1, 0, 1, 2]
# Adjust the height of the colorbar by setting the shrink parameter
cbar = plt.colorbar(sc, ax=ax, label='Production change\n($10^{4}$ t)', orientation='vertical', pad=0.02, shrink=0.8, ticks=custom_ticks)
# Set the font size of the colorbar label
# Set the colorbar label text, font size, and padding
cbar.ax.set_ylabel('Production change\n($10^{4}$ tonnes/yr)', fontsize=16, fontfamily='Arial')
cbar.ax.tick_params(labelsize=16) # Adjust tick label fontsize
cbar.ax.set_yticklabels(custom_ticks, fontsize=16, fontfamily='Arial')
filtered_data.plot(column='gap_production_warming', cmap=cmap, ax=ax, norm=norm)
# Add labels, titles, or other plot configurations as needed
ax.set_xticks([]) # Remove x-axis ticks
ax.set_yticks([]) # Remove y-axis ticks
ax.set_frame_on(False) # Turn off the frame
plt.text(0.25, 0.2, '(f) Soybean', transform=plt.gcf().transFigure,
fontsize=18, fontweight='bold', fontfamily='Arial', ha='left', va='bottom')
# Save the entire figure as a JPG with DPI 300
plt.savefig('soybean_all_production_3.jpg', format='jpg', dpi=300, bbox_inches='tight', pad_inches=0.1)
# Show the plot
plt.show()
geoid scenario y_temp_base_t lnyield_obs har tmp \ 14590 1001 3.0 7.271526 7.509678 779.625 25.010676 14591 1001 3.0 7.431366 7.509678 779.625 25.010676 14592 1001 3.0 7.450286 7.509678 779.625 25.010676 14593 1001 3.0 7.261009 7.509678 779.625 25.010676 15264 1001 3.0 7.314470 7.509678 779.625 25.010676 ... ... ... ... ... ... ... 2443 55141 3.0 8.623206 7.921128 6700.507 16.504032 2445 55141 3.0 8.605665 7.921128 6700.507 16.504032 2447 55141 3.0 8.687835 7.921128 6700.507 16.504032 3386 55141 3.0 8.575415 7.921128 6700.507 16.504032 3388 55141 3.0 8.550782 7.921128 6700.507 16.504032 pre lnfer irrigation1 y_temp_base2015 yield_warming \ 14590 6.605178 2.457656 0.377088 7.534804 1403.045960 14591 6.605178 2.457656 0.377088 7.534804 1646.225119 14592 6.605178 2.457656 0.377088 7.534804 1677.669424 14593 6.605178 2.457656 0.377088 7.534804 1388.367184 15264 6.605178 2.457656 0.377088 7.534804 1464.610233 ... ... ... ... ... ... 2443 6.108389 2.556201 0.000000 8.324783 3712.833376 2445 6.108389 2.556201 0.000000 8.324783 3648.275536 2447 6.108389 2.556201 0.000000 8.324783 3960.714792 3386 6.108389 2.556201 0.000000 8.324783 3539.567995 3388 6.108389 2.556201 0.000000 8.324783 3453.442559 gap_yield_warming gap_production_warming yield_obs production_obs 14590 -0.422580 -0.032945 1.825626 0.142330 14591 -0.179400 -0.013987 1.825626 0.142330 14592 -0.147956 -0.011535 1.825626 0.142330 14593 -0.437258 -0.034090 1.825626 0.142330 15264 -0.361015 -0.028146 1.825626 0.142330 ... ... ... ... ... 2443 0.957957 0.641879 2.754877 1.845907 2445 0.893399 0.598622 2.754877 1.845907 2447 1.205838 0.807973 2.754877 1.845907 3386 0.784691 0.525783 2.754877 1.845907 3388 0.698566 0.468074 2.754877 1.845907 [8865 rows x 15 columns] geoid scenario y_temp_base_t lnyield_obs har tmp \ 7800 17005 3.0 8.330424 8.059599 32668.200 21.931076 8246 17005 3.0 8.346432 8.059599 32668.200 21.931076 8248 17005 3.0 8.360294 8.059599 32668.200 21.931076 8249 17005 3.0 8.328988 8.059599 32668.200 21.931076 3393 17007 3.0 8.193610 8.171467 16426.800 18.375522 ... ... ... ... ... ... ... 2443 55141 3.0 8.623206 7.921128 6700.507 16.504032 2445 55141 3.0 8.605665 7.921128 6700.507 16.504032 2447 55141 3.0 8.687835 7.921128 6700.507 16.504032 3386 55141 3.0 8.575415 7.921128 6700.507 16.504032 3388 55141 3.0 8.550782 7.921128 6700.507 16.504032 pre lnfer irrigation1 y_temp_base2015 yield_warming \ 7800 5.610648 1.491895 0.000000 8.292404 3286.635407 8246 5.610648 1.491895 0.000000 8.292404 3339.670577 8248 5.610648 1.491895 0.000000 8.292404 3386.288227 8249 5.610648 1.491895 0.000000 8.292404 3281.917630 3393 5.985170 0.136541 0.007737 8.049519 4086.963393 ... ... ... ... ... ... 2443 6.108389 2.556201 0.000000 8.324783 3712.833376 2445 6.108389 2.556201 0.000000 8.324783 3648.275536 2447 6.108389 2.556201 0.000000 8.324783 3960.714792 3386 6.108389 2.556201 0.000000 8.324783 3539.567995 3388 6.108389 2.556201 0.000000 8.324783 3453.442559 gap_yield_warming gap_production_warming yield_obs production_obs 7800 0.122614 0.400559 3.164021 10.336287 8246 0.175649 0.573815 3.164021 10.336287 8248 0.222267 0.726107 3.164021 10.336287 8249 0.117897 0.385147 3.164021 10.336287 3393 0.548432 0.900899 3.538531 5.812674 ... ... ... ... ... 2443 0.957957 0.641879 2.754877 1.845907 2445 0.893399 0.598622 2.754877 1.845907 2447 1.205838 0.807973 2.754877 1.845907 3386 0.784691 0.525783 2.754877 1.845907 3388 0.698566 0.468074 2.754877 1.845907 [4044 rows x 15 columns] -13.380669438706175 14.569760438929762 0.0690849512380357 -1.4140473786194523 1.708186512332604 sum of gap production 612.4380927251864 sum of production total 49397.15004655279 sum of production total in warming 29396.147918762574
/var/folders/vd/0_phd7hx2n51y4412862zww00000gp/T/ipykernel_2033/3338880286.py:15: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame. Try using .loc[row_indexer,col_indexer] = value instead See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy data['geoid'] = data['geoid'].astype(float) /var/folders/vd/0_phd7hx2n51y4412862zww00000gp/T/ipykernel_2033/3338880286.py:26: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy data.rename(columns={'geoid': 'GEOID'}, inplace=True) /var/folders/vd/0_phd7hx2n51y4412862zww00000gp/T/ipykernel_2033/3338880286.py:29: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame. Try using .loc[row_indexer,col_indexer] = value instead See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy data['GEOID'] = data['GEOID'].astype(float)