import pandas as pd
import numpy as np
import statsmodels.api as sm
import matplotlib.pyplot as plt
import matplotlib.colors as mcolors
import netCDF4 as nc
import pandas as pd
import os
import csv
from glob import glob
import xarray as xr
import matplotlib.ticker as ticker
import numpy as np
import pandas as pd
import statsmodels.api as sm
import matplotlib.pyplot as plt
from matplotlib.gridspec import GridSpec
import numpy as np
import pandas as pd
import statsmodels.api as sm
import matplotlib.pyplot as plt
from matplotlib.gridspec import GridSpec
from matplotlib.colors import LinearSegmentedColormap, Normalize
from mpl_toolkits.axes_grid1 import make_axes_locatable
import geopandas as gpd
import matplotlib.colors as mcolors
# Step 1: Data Preparation
# Load your dataset into a pandas DataFrame
os.chdir('/Users/chenchenren/postdoc/paper/2N and water-US/Figure 3 Yiled under warming scenario/')
excel_file = "../regression/corn_reg_data.csv"
# Read the specific sheet from the Excel file
reg= pd.read_csv(excel_file)
excel_file = "../regression/climate_zone.csv"
# Read the specific sheet from the Excel file
climate_zone= pd.read_csv(excel_file)
# Load data from an Excel file
excel_file = "scenario_management.xlsx"
sheet_name = "maize"
data = pd.read_excel(excel_file, sheet_name=sheet_name)
data=pd.merge(data, climate_zone, on='geoid', how='left')
data1=data[data['sample'] == 1].copy()
data1=data1.sort_values(by=['geoid', 'scenario'])
print(data1)
data2015=data1[data1['scenario'] == 2015].copy()
print(data2015)
data1['tmp'] = data1.groupby(['geoid','scenario'])['tmp'].transform('mean')
data1['pre'] = data1.groupby(['geoid','scenario'])['pre'].transform('mean')
data1['tmp2015'] = data1.groupby(['geoid','scenario'])['tmp2015'].transform('mean')
data1['pre2015'] = data1.groupby(['geoid','scenario'])['pre2015'].transform('mean')
data1=data1.drop_duplicates(subset=['geoid','scenario'])
data1=data1.drop(['model','lnyield'], axis=1)
print(data1)
geoid year tmp pre har irrigation1 lnyield_obs \ 30022 1001 2015 23.28228 7.960044 446.6812 0.429451 9.093130 30023 1001 2015 22.57526 8.138788 446.6812 0.429451 9.093130 30026 1001 2015 22.91238 8.173495 446.6812 0.429451 9.093130 30027 1001 2015 22.94868 10.270980 446.6812 0.429451 9.093130 30029 1001 2015 23.06275 8.530843 446.6812 0.429451 9.093130 ... ... ... ... ... ... ... ... 30018 56043 2015 12.24377 2.675818 972.0000 0.000000 9.124074 30020 56043 2015 13.58658 3.059424 972.0000 0.000000 9.124074 50961 56043 2015 12.85831 3.102477 972.0000 0.000000 9.124074 50963 56043 2015 13.56218 2.712176 972.0000 0.000000 9.124074 50967 56043 2015 11.19868 2.964274 972.0000 0.000000 9.124074 lnfer lnyield group sample scenario pre2015 model \ 30022 6.438559 NaN 1 1 1.5 5.544486 gfdl 30023 6.438559 NaN 1 1 1.5 8.369947 ipsl 30026 6.438559 NaN 1 1 1.5 6.570490 mpi 30027 6.438559 NaN 1 1 1.5 11.500060 mri 30029 6.438559 NaN 1 1 1.5 11.407870 ukesm1 ... ... ... ... ... ... ... ... 30018 6.780481 NaN 2 1 2015.0 2.675818 mpi 30020 6.780481 NaN 2 1 2015.0 3.059424 ukesm1 50961 6.780481 NaN 2 1 2015.0 3.102477 gfdl 50963 6.780481 NaN 2 1 2015.0 2.712176 ipsl 50967 6.780481 NaN 2 1 2015.0 2.964274 mri tmp2015 zone 30022 23.74543 3.0 30023 21.99282 3.0 30026 22.97267 3.0 30027 23.24994 3.0 30029 21.75993 3.0 ... ... ... 30018 12.24377 1.0 30020 13.58658 1.0 50961 12.85831 1.0 50963 13.56218 1.0 50967 11.19868 1.0 [31425 rows x 16 columns] geoid year tmp pre har irrigation1 lnyield_obs \ 19546 1001 2015 23.74543 5.544486 446.6812 0.429451 9.093130 30024 1001 2015 21.99282 8.369947 446.6812 0.429451 9.093130 30025 1001 2015 22.97267 6.570490 446.6812 0.429451 9.093130 30028 1001 2015 23.24994 11.500060 446.6812 0.429451 9.093130 30030 1001 2015 21.75993 11.407870 446.6812 0.429451 9.093130 ... ... ... ... ... ... ... ... 30018 56043 2015 12.24377 2.675818 972.0000 0.000000 9.124074 30020 56043 2015 13.58658 3.059424 972.0000 0.000000 9.124074 50961 56043 2015 12.85831 3.102477 972.0000 0.000000 9.124074 50963 56043 2015 13.56218 2.712176 972.0000 0.000000 9.124074 50967 56043 2015 11.19868 2.964274 972.0000 0.000000 9.124074 lnfer lnyield group sample scenario pre2015 model \ 19546 6.438559 NaN 1 1 2015.0 5.544486 gfdl 30024 6.438559 NaN 1 1 2015.0 8.369947 ipsl 30025 6.438559 NaN 1 1 2015.0 6.570490 mpi 30028 6.438559 NaN 1 1 2015.0 11.500060 mri 30030 6.438559 NaN 1 1 2015.0 11.407870 ukesm1 ... ... ... ... ... ... ... ... 30018 6.780481 NaN 2 1 2015.0 2.675818 mpi 30020 6.780481 NaN 2 1 2015.0 3.059424 ukesm1 50961 6.780481 NaN 2 1 2015.0 3.102477 gfdl 50963 6.780481 NaN 2 1 2015.0 2.712176 ipsl 50967 6.780481 NaN 2 1 2015.0 2.964274 mri tmp2015 zone 19546 23.74543 3.0 30024 21.99282 3.0 30025 22.97267 3.0 30028 23.24994 3.0 30030 21.75993 3.0 ... ... ... 30018 12.24377 1.0 30020 13.58658 1.0 50961 12.85831 1.0 50963 13.56218 1.0 50967 11.19868 1.0 [10475 rows x 16 columns] geoid year tmp pre har irrigation1 lnyield_obs \ 30022 1001 2015 22.956270 8.614830 446.6812 0.429451 9.093130 19547 1001 2015 24.564876 8.484914 446.6812 0.429451 9.093130 19546 1001 2015 22.744158 8.678571 446.6812 0.429451 9.093130 19551 1003 2015 24.364442 11.696664 2463.8170 1.376478 9.058842 19553 1003 2015 25.822928 11.421008 2463.8170 1.376478 9.058842 ... ... ... ... ... ... ... ... 30013 56031 2015 15.676956 4.132068 3807.0000 0.000000 9.136227 30014 56031 2015 13.419220 4.092800 3807.0000 0.000000 9.136227 50962 56043 2015 12.759980 2.925866 972.0000 0.000000 9.124074 30016 56043 2015 14.796224 2.868278 972.0000 0.000000 9.124074 30018 56043 2015 12.689904 2.902834 972.0000 0.000000 9.124074 lnfer group sample scenario pre2015 tmp2015 zone 30022 6.438559 1 1 1.5 8.678571 22.744158 3.0 19547 6.438559 1 1 3.0 8.678571 22.744158 3.0 19546 6.438559 1 1 2015.0 8.678571 22.744158 3.0 19551 5.456232 1 1 1.5 11.511937 24.155120 3.0 19553 5.456232 1 1 3.0 11.511937 24.155120 3.0 ... ... ... ... ... ... ... ... 30013 6.229183 2 1 3.0 4.092800 13.419220 1.0 30014 6.229183 2 1 2015.0 4.092800 13.419220 1.0 50962 6.780481 2 1 1.5 2.902834 12.689904 1.0 30016 6.780481 2 1 3.0 2.902834 12.689904 1.0 30018 6.780481 2 1 2015.0 2.902834 12.689904 1.0 [6285 rows x 14 columns]
import pandas as pd
import statsmodels.api as sm
# Load data from an Excel file
excel_file = "scenario_management.xlsx"
sheet_name = "maize"
data = pd.read_excel(excel_file, sheet_name=sheet_name)
# Assuming climate_zone is already loaded or defined
data = pd.merge(data, climate_zone, on='geoid', how='left')
# Create interaction terms
data['tmp_tmp_interaction'] = data['tmp'] ** 2
data['pre_pre_interaction'] = data['pre'] ** 2
data['irrigation12'] = data['irrigation1'] ** 2
data['lnfer_irrigation1_tmp_interaction'] = data['lnfer'] * data['irrigation1'] * data['tmp']
data['lnfer_irrigation12_tmp_tmp_interaction'] = data['lnfer'] * data['irrigation12'] * data['tmp'] ** 2
data['lnfer_irrigation1_pre_interaction'] = data['lnfer'] * data['irrigation1'] * data['pre']
data['lnfer_irrigation12_pre_pre_interaction'] = data['lnfer'] * data['irrigation12'] * data['pre'] ** 2
data['lnfer_tmp_interaction'] = data['lnfer'] * data['tmp']
data['lnfer_tmp_tmp_interaction'] = data['lnfer'] * data['tmp'] ** 2
data['lnfer_pre_interaction'] = data['lnfer'] * data['pre']
data['lnfer_pre_pre_interaction'] = data['lnfer'] * data['pre'] ** 2
response_variable = 'lnyield'
# Function to perform regression analysis
def perform_regression(group_data, predictor_variables):
X = sm.add_constant(group_data[predictor_variables])
y = group_data[response_variable]
return sm.OLS(y, X).fit()
# Initialize a nested dictionary to store regression results
reg = {}
data_rows = []
sample_data = data[data['sample'] == 0].copy()
project_data = data.copy()
zone_ = [1, 2, 3]
group_ = [1, 2]
# Check the unique values in the 'zone' and 'group' columns
print("Unique zones:", sample_data['zone'].unique())
print("Unique groups:", sample_data['group'].unique())
# Iterate through groups and zones in sample_data
for zone in zone_:
reg[zone] = {}
for group in group_:
print(f"\nSubset for Zone {zone}, Group {group}:")
# Calculate y_temp_base based on regression parameters
tmp_values = project_data[(project_data['group'] == group) & (project_data['zone'] == zone)]['tmp'].values
pre_values = project_data[(project_data['group'] == group) & (project_data['zone'] == zone)]['pre'].values
lnfer_values = project_data[(project_data['group'] == group) & (project_data['zone'] == zone)]['lnfer'].values
irrigation1_values = project_data[(project_data['group'] == group) & (project_data['zone'] == zone)]['irrigation1'].values
if group == 1:
group_data1 = sample_data[(sample_data['zone'] == zone) & (sample_data['group'] == group)]
predictor_variables = ['lnfer', 'tmp', 'irrigation1', 'irrigation12', 'tmp_tmp_interaction',
'pre', 'pre_pre_interaction', 'lnfer_irrigation1_pre_interaction', 'lnfer_irrigation12_pre_pre_interaction',
'lnfer_irrigation1_tmp_interaction', 'lnfer_irrigation12_tmp_tmp_interaction']
print(f"\nSubset for Zone {zone}, Group {group}:")
reg_model = perform_regression(group_data1, predictor_variables)
print(f"Regression const for Zone {zone}, Group {group}: {reg_model.params['lnfer']}")
y_temp_base = (
reg_model.params['tmp_tmp_interaction'] * tmp_values**2 +
reg_model.params['tmp'] * tmp_values +
reg_model.params['pre_pre_interaction'] * pre_values**2 +
reg_model.params['pre'] * pre_values +
reg_model.params['lnfer'] * lnfer_values +
reg_model.params['irrigation1'] * irrigation1_values +
reg_model.params['irrigation12'] * irrigation1_values**2 +
reg_model.params['lnfer_irrigation1_tmp_interaction'] * irrigation1_values * tmp_values * lnfer_values +
reg_model.params['lnfer_irrigation12_tmp_tmp_interaction'] * irrigation1_values**2 * lnfer_values * tmp_values**2 +
reg_model.params['lnfer_irrigation1_pre_interaction'] * irrigation1_values * pre_values * lnfer_values +
reg_model.params['lnfer_irrigation12_pre_pre_interaction'] * irrigation1_values**2 * lnfer_values * pre_values**2 +
reg_model.params['const']
)
else:
group_data2 = sample_data[(sample_data['zone'] == zone) & (sample_data['group'] == group)]
predictor_variables = ['lnfer', 'tmp', 'tmp_tmp_interaction', 'irrigation1', 'irrigation12', 'pre', 'pre_pre_interaction',
'lnfer_tmp_interaction', 'lnfer_tmp_tmp_interaction',
'lnfer_pre_interaction', 'lnfer_pre_pre_interaction']
print(f"\nSubset for Zone {zone}, Group {group}:")
reg_model = perform_regression(group_data2, predictor_variables)
print(f"Regression const for Zone {zone}, Group {group}: {reg_model.params['lnfer']}")
y_temp_base = (
reg_model.params['tmp_tmp_interaction'] * tmp_values**2 +
reg_model.params['tmp'] * tmp_values +
reg_model.params['pre_pre_interaction'] * pre_values**2 +
reg_model.params['pre'] * pre_values +
reg_model.params['lnfer'] * lnfer_values +
reg_model.params['lnfer_tmp_interaction'] * tmp_values * lnfer_values +
reg_model.params['lnfer_tmp_tmp_interaction'] * lnfer_values * tmp_values**2 +
reg_model.params['lnfer_pre_interaction'] * pre_values * lnfer_values +
reg_model.params['lnfer_pre_pre_interaction'] * lnfer_values * pre_values**2 +
reg_model.params['const']
)
# Extract additional information
geoid_values = project_data[(project_data['group'] == group) & (project_data['zone'] == zone)]['geoid'].values
scenario_values = project_data[(project_data['group'] == group) & (project_data['zone'] == zone)]['scenario'].values
lnyield_obs_values = project_data[(project_data['group'] == group) & (project_data['zone'] == zone)]['lnyield_obs'].values
har_values = project_data[(project_data['group'] == group) & (project_data['zone'] == zone)]['har'].values
for tmp, pre, lnfer, irrigation1, y_temp_base_val, geoid, scenario, lnyield_obs, har in zip(
tmp_values, pre_values, lnfer_values, irrigation1_values, y_temp_base, geoid_values, scenario_values, lnyield_obs_values, har_values
):
data_rows.append([tmp, pre, lnfer, irrigation1, y_temp_base_val, geoid, scenario, lnyield_obs, har])
# Create a DataFrame from the data
results_base = pd.DataFrame(data_rows, columns=['tmp', 'pre', 'lnfer', 'irrigation1',
'y_temp_base', 'geoid', 'scenario',
'lnyield_obs', 'har'])
# Filter for specific scenarios
results_base_t = results_base[(results_base['scenario'] == 1.5) | (results_base['scenario'] == 3)]
# Rename the 'y_temp_base' column to 'y_temp_base_t'
results_base_t = results_base_t.rename(columns={'y_temp_base': 'y_temp_base_t'})
# Select relevant columns
results_base_t = results_base_t[['geoid', 'scenario', 'y_temp_base_t', 'lnyield_obs', 'har']]
# Remove duplicates based on 'geoid' and 'scenario' columns
results_base_t = results_base_t.drop_duplicates(subset=['geoid', 'scenario'])
print(results_base_t)
Unique zones: [3. 2. 1.] Unique groups: [1 2] Subset for Zone 1, Group 1: Subset for Zone 1, Group 1: Regression const for Zone 1, Group 1: 0.45801946844805047 Subset for Zone 1, Group 2: Subset for Zone 1, Group 2:
--------------------------------------------------------------------------- ValueError Traceback (most recent call last) Cell In[16], line 91 89 formula = 'lnyield ~ lnfer + tmp + tmp_tmp_interaction + pre + pre_pre_interaction +lnfer_tmp_interaction+lnfer_tmp_tmp_interaction+lnfer_pre_interaction+lnfer_pre_pre_interaction+ C(geoid)' 90 # Run the regression using the formula interface with clustered standard errors ---> 91 reg_model = sm.OLS.from_formula(formula, data=group_data2).fit(cov_type='cluster', cov_kwds={'groups': data['geoid']}) 94 print(f"Regression const for Zone {zone}, Group {group}: {reg_model.params['lnfer']}") 95 y_temp_base = ( 96 reg_model.params['tmp_tmp_interaction'] * tmp_values**2 + 97 reg_model.params['tmp'] * tmp_values + (...) 105 reg_model.params['const'] 106 ) File ~/anaconda3/lib/python3.11/site-packages/statsmodels/regression/linear_model.py:373, in RegressionModel.fit(self, method, cov_type, cov_kwds, use_t, **kwargs) 370 self.df_resid = self.nobs - self.rank 372 if isinstance(self, OLS): --> 373 lfit = OLSResults( 374 self, beta, 375 normalized_cov_params=self.normalized_cov_params, 376 cov_type=cov_type, cov_kwds=cov_kwds, use_t=use_t) 377 else: 378 lfit = RegressionResults( 379 self, beta, 380 normalized_cov_params=self.normalized_cov_params, 381 cov_type=cov_type, cov_kwds=cov_kwds, use_t=use_t, 382 **kwargs) File ~/anaconda3/lib/python3.11/site-packages/statsmodels/regression/linear_model.py:1652, in RegressionResults.__init__(self, model, params, normalized_cov_params, scale, cov_type, cov_kwds, use_t, **kwargs) 1650 use_t = use_t_2 1651 # TODO: warn or not? -> 1652 self.get_robustcov_results(cov_type=cov_type, use_self=True, 1653 use_t=use_t, **cov_kwds) 1654 for key in kwargs: 1655 setattr(self, key, kwargs[key]) File ~/anaconda3/lib/python3.11/site-packages/statsmodels/regression/linear_model.py:2599, in RegressionResults.get_robustcov_results(self, cov_type, use_t, **kwargs) 2595 if adjust_df: 2596 # need to find number of groups 2597 # duplicate work 2598 self.n_groups = n_groups = len(np.unique(groups)) -> 2599 res.cov_params_default = sw.cov_cluster( 2600 self, groups, use_correction=use_correction) 2602 elif groups.ndim == 2: 2603 if hasattr(groups, 'values'): File ~/anaconda3/lib/python3.11/site-packages/statsmodels/stats/sandwich_covariance.py:531, in cov_cluster(results, group, use_correction) 528 else: 529 clusters = np.unique(group) --> 531 scale = S_crosssection(xu, group) 533 nobs, k_params = xu.shape 534 n_groups = len(clusters) #replace with stored group attributes if available File ~/anaconda3/lib/python3.11/site-packages/statsmodels/stats/sandwich_covariance.py:485, in S_crosssection(x, group) 476 def S_crosssection(x, group): 477 '''inner covariance matrix for White on group sums sandwich 478 479 I guess for a single categorical group only, (...) 483 484 ''' --> 485 x_group_sums = group_sums(x, group).T #TODO: why transposed 487 return S_white_simple(x_group_sums) File ~/anaconda3/lib/python3.11/site-packages/statsmodels/tools/grouputils.py:105, in group_sums(x, group, use_bincount) 102 if np.max(group) > 2 * x.shape[0]: 103 group = pd.factorize(group)[0] --> 105 return np.array([np.bincount(group, weights=x[:, col]) 106 for col in range(x.shape[1])]) 107 else: 108 uniques = np.unique(group) File ~/anaconda3/lib/python3.11/site-packages/statsmodels/tools/grouputils.py:105, in <listcomp>(.0) 102 if np.max(group) > 2 * x.shape[0]: 103 group = pd.factorize(group)[0] --> 105 return np.array([np.bincount(group, weights=x[:, col]) 106 for col in range(x.shape[1])]) 107 else: 108 uniques = np.unique(group) File <__array_function__ internals>:200, in bincount(*args, **kwargs) ValueError: The weights and list don't have the same length.
# Load data from an Excel file
excel_file = "scenario_management.xlsx"
sheet_name = "maize"
data = pd.read_excel(excel_file, sheet_name=sheet_name)
# Assuming climate_zone is already loaded or defined
data = pd.merge(data, climate_zone, on='geoid', how='left')
# Create interaction terms
data['tmp_tmp_interaction'] = data['tmp'] ** 2
data['pre_pre_interaction'] = data['pre'] ** 2
data['irrigation12'] = data['irrigation1'] ** 2
data['lnfer_irrigation1_tmp_interaction'] = data['lnfer'] * data['irrigation1'] * data['tmp']
data['lnfer_irrigation12_tmp_tmp_interaction'] = data['lnfer'] * data['irrigation12'] * data['tmp'] ** 2
data['lnfer_irrigation1_pre_interaction'] = data['lnfer'] * data['irrigation1'] * data['pre']
data['lnfer_irrigation12_pre_pre_interaction'] = data['lnfer'] * data['irrigation12'] * data['pre'] ** 2
data['lnfer_tmp_interaction'] = data['lnfer'] * data['tmp']
data['lnfer_tmp_tmp_interaction'] = data['lnfer'] * data['tmp'] ** 2
data['lnfer_pre_interaction'] = data['lnfer'] * data['pre']
data['lnfer_pre_pre_interaction'] = data['lnfer'] * data['pre'] ** 2
response_variable = 'lnyield'
# Function to perform regression analysis
def perform_regression(group_data, predictor_variables):
X = sm.add_constant(group_data[predictor_variables])
y = group_data[response_variable]
return sm.OLS(y, X).fit()
# Initialize a nested dictionary to store regression results
reg = {}
data_rows = []
sample_data = data[data['sample'] == 0].copy()
project_data = data1
zone_ = [1, 2, 3]
group_ = [1, 2]
# Check the unique values in the 'zone' and 'group' columns
print("Unique zones:", sample_data['zone'].unique())
print("Unique groups:", sample_data['group'].unique())
# Iterate through groups and zones in sample_data
for zone in zone_:
reg[zone] = {}
for group in group_:
print(f"\nSubset for Zone {zone}, Group {group}:")
tmp_values = project_data[(project_data['group'] == group) & (project_data['zone'] == zone)]['tmp'].values
pre_values = project_data[(project_data['group'] == group) & (project_data['zone'] == zone)]['pre'].values
lnfer_values = project_data[(project_data['group'] == group) & (project_data['zone'] == zone)]['lnfer'].values
irrigation1_values = project_data[(project_data['group'] == group) & (project_data['zone'] == zone)]['irrigation1'].values
if group == 1:
print(f"\nSubset for Zone {zone}, Group {group}:")
group_data = sample_data[(sample_data['zone'] == zone) & (sample_data['group'] == group)]
predictor_variables = ['lnfer', 'tmp', 'irrigation1', 'irrigation12', 'tmp_tmp_interaction',
'pre', 'pre_pre_interaction', 'lnfer_irrigation1_pre_interaction', 'lnfer_irrigation12_pre_pre_interaction',
'lnfer_irrigation1_tmp_interaction', 'lnfer_irrigation12_tmp_tmp_interaction']
reg_model = perform_regression(group_data, predictor_variables)
print(f"Regression const for Zone {zone}, Group {group}: {reg_model.params['lnfer']}")
y_temp_base = (
reg_model.params['tmp_tmp_interaction'] * tmp_values**2 +
reg_model.params['tmp'] * tmp_values +
reg_model.params['pre_pre_interaction'] * pre_values**2 +
reg_model.params['pre'] * pre_values +
reg_model.params['lnfer'] * lnfer_values +
reg_model.params['irrigation1'] * irrigation1_values +
reg_model.params['irrigation12'] * irrigation1_values**2 +
reg_model.params['lnfer_irrigation1_tmp_interaction'] * irrigation1_values * tmp_values * lnfer_values +
reg_model.params['lnfer_irrigation12_tmp_tmp_interaction'] * irrigation1_values**2 * lnfer_values * tmp_values**2 +
reg_model.params['lnfer_irrigation1_pre_interaction'] * irrigation1_values * pre_values * lnfer_values +
reg_model.params['lnfer_irrigation12_pre_pre_interaction'] * irrigation1_values**2 * lnfer_values * pre_values**2 +
reg_model.params['const']
)
else:
group_data = sample_data[(sample_data['zone'] == zone) & (sample_data['group'] == group)]
predictor_variables = ['lnfer', 'tmp', 'tmp_tmp_interaction', 'irrigation1', 'irrigation12', 'pre', 'pre_pre_interaction',
'lnfer_tmp_interaction', 'lnfer_tmp_tmp_interaction',
'lnfer_pre_interaction', 'lnfer_pre_pre_interaction']
reg_model = perform_regression(group_data, predictor_variables)
print(f"\nSubset for Zone {zone}, Group {group}:")
print(f"Regression const for Zone {zone}, Group {group}: {reg_model.params['lnfer']}")
y_temp_base = (
reg_model.params['tmp_tmp_interaction'] * tmp_values**2 +
reg_model.params['tmp'] * tmp_values +
reg_model.params['pre_pre_interaction'] * pre_values**2 +
reg_model.params['pre'] * pre_values +
reg_model.params['lnfer'] * lnfer_values +
reg_model.params['lnfer_tmp_interaction'] * tmp_values * lnfer_values +
reg_model.params['lnfer_tmp_tmp_interaction'] * lnfer_values * tmp_values**2 +
reg_model.params['lnfer_pre_interaction'] * pre_values * lnfer_values +
reg_model.params['lnfer_pre_pre_interaction'] * lnfer_values * pre_values**2 +
reg_model.params['const']
)
geoid_values = project_data[(project_data['group'] == group) & (project_data['zone'] == zone)]['geoid'].values
scenario_values = project_data[(project_data['group'] == group) & (project_data['zone'] == zone)]['scenario'].values
for tmp, pre, lnfer, irrigation1,y_temp_base_val, geoid, scenario in zip(
tmp_values, pre_values, lnfer_values, irrigation1_values,
y_temp_base, geoid_values, scenario_values
):
data_rows.append([tmp, pre, lnfer, irrigation1, y_temp_base_val, geoid, scenario])
# Create a DataFrame from the data
results_base = pd.DataFrame(data_rows, columns=['tmp','pre','lnfer','irrigation1',
'y_temp_base', 'geoid', 'scenario'])
results_base = results_base[results_base['scenario'] == 2015]
# Rename the 'y_temp_base' column to 'y_temp_base2015'
results_base.rename(columns={'y_temp_base': 'y_temp_base2015'}, inplace=True)
# Keep only the desired columns in results_2015
results_base2015 = results_base[['geoid', 'y_temp_base2015']]
print(results_base2015)
Unique zones: [3. 2. 1.] Unique groups: [1 2] Subset for Zone 1, Group 1: Subset for Zone 1, Group 1: Regression const for Zone 1, Group 1: 0.45801946844805047 Subset for Zone 1, Group 2: Subset for Zone 1, Group 2: Regression const for Zone 1, Group 2: -0.1479070385993198 Subset for Zone 2, Group 1: Subset for Zone 2, Group 1: Regression const for Zone 2, Group 1: 0.45801945047853226 Subset for Zone 2, Group 2: Subset for Zone 2, Group 2: Regression const for Zone 2, Group 2: -0.14790934219663043 Subset for Zone 3, Group 1: Subset for Zone 3, Group 1: Regression const for Zone 3, Group 1: 0.45801945576264025 Subset for Zone 3, Group 2: Subset for Zone 3, Group 2: Regression const for Zone 3, Group 2: -0.14791391669856416 geoid y_temp_base2015 2 8001 8.480254 5 8013 8.916054 8 8025 8.732799 11 8039 8.666240 14 8069 8.752595 ... ... ... 6272 48481 9.196748 6275 48489 8.295716 6278 48491 7.688966 6281 48493 8.843162 6284 48507 8.524203 [2095 rows x 2 columns]
data = results_base_t.merge(results_base, left_on=['geoid'], right_on=['geoid'], how='left')
data = data.drop(columns=['scenario_y'])
data.rename(columns={'scenario_x': 'scenario'}, inplace=True)
data = data.sort_values(by=['geoid', 'scenario'])
print(data)
data['yield_warming']=np.exp(data['y_temp_base_t']-data['y_temp_base2015']+data['lnyield_obs'])
data['gap_yield_warming']=(data['yield_warming']-np.exp(data['lnyield_obs']))/1000 #tonne/ha
data['gap_production_warming']=data['gap_yield_warming']*data['har']/10000 #10000 tonne
data['yield_obs']=(np.exp(data['lnyield_obs']))/1000 #tonne/ha
data['production_obs']=data['yield_obs']*data['har']/10000 #10000 tonne
print(data)
alldata=data
# Extract the corresponding lnfer_value and irrigation1_value
output_df = data[['geoid', 'scenario','gap_yield_warming','gap_production_warming']]
# Rename the columns to match your desired output
output_df.columns = ['geoid', 'scenario','gap_yield_warming','gap_production_warming']
# Define the output CSV file pat
output_csv_file = "maize_tem.csv"
# Save the 'output_df' DataFrame to a CSV file
output_df.to_csv(output_csv_file, index=False)
print(f"Data saved to '{output_csv_file}'")
geoid scenario y_temp_base_t lnyield_obs har tmp \ 3799 1001 1.5 9.365828 9.093130 446.6812 22.744158 3248 1001 3.0 9.390342 9.093130 446.6812 22.744158 3249 1003 1.5 9.008146 9.058842 2463.8170 24.155120 3250 1003 3.0 8.974818 9.058842 2463.8170 24.155120 3251 1005 1.5 9.247303 9.133368 808.6837 23.166352 ... ... ... ... ... ... ... 1070 56029 3.0 8.798372 9.065753 1269.0000 9.406989 1071 56031 1.5 9.339298 9.136227 3807.0000 13.419220 1072 56031 3.0 9.271814 9.136227 3807.0000 13.419220 1135 56043 1.5 9.479943 9.124074 972.0000 12.689904 1073 56043 3.0 9.560979 9.124074 972.0000 12.689904 pre lnfer irrigation1 y_temp_base2015 3799 8.678571 6.438559 0.429451 9.419198 3248 8.678571 6.438559 0.429451 9.419198 3249 11.511937 5.456232 1.376478 9.019800 3250 11.511937 5.456232 1.376478 9.019800 3251 8.473777 6.234782 0.153295 9.295728 ... ... ... ... ... 1070 3.394136 5.616840 0.000000 8.733599 1071 4.092800 6.229183 0.000000 9.293578 1072 4.092800 6.229183 0.000000 9.293578 1135 2.902834 6.780481 0.000000 9.435028 1073 2.902834 6.780481 0.000000 9.435028 [4190 rows x 10 columns] geoid scenario y_temp_base_t lnyield_obs har tmp \ 3799 1001 1.5 9.365828 9.093130 446.6812 22.744158 3248 1001 3.0 9.390342 9.093130 446.6812 22.744158 3249 1003 1.5 9.008146 9.058842 2463.8170 24.155120 3250 1003 3.0 8.974818 9.058842 2463.8170 24.155120 3251 1005 1.5 9.247303 9.133368 808.6837 23.166352 ... ... ... ... ... ... ... 1070 56029 3.0 8.798372 9.065753 1269.0000 9.406989 1071 56031 1.5 9.339298 9.136227 3807.0000 13.419220 1072 56031 3.0 9.271814 9.136227 3807.0000 13.419220 1135 56043 1.5 9.479943 9.124074 972.0000 12.689904 1073 56043 3.0 9.560979 9.124074 972.0000 12.689904 pre lnfer irrigation1 y_temp_base2015 yield_warming \ 3799 8.678571 6.438559 0.429451 9.419198 8431.748247 3248 8.678571 6.438559 0.429451 9.419198 8640.996972 3249 11.511937 5.456232 1.376478 9.019800 8494.620047 3250 11.511937 5.456232 1.376478 9.019800 8216.172302 3251 8.473777 6.234782 0.153295 9.295728 8821.461742 ... ... ... ... ... ... 1070 3.394136 5.616840 0.000000 8.733599 9232.882507 1071 4.092800 6.229183 0.000000 9.293578 9720.060517 1072 4.092800 6.229183 0.000000 9.293578 9085.755718 1135 2.902834 6.780481 0.000000 9.435028 9594.921006 1073 2.902834 6.780481 0.000000 9.435028 10404.826799 gap_yield_warming gap_production_warming yield_obs production_obs 3799 -0.462232 -0.020647 8.893981 0.397277 3248 -0.252984 -0.011300 8.893981 0.397277 3249 -0.099573 -0.024533 8.594193 2.117452 3250 -0.378021 -0.093137 8.594193 2.117452 3251 -0.437693 -0.035395 9.259154 0.748773 ... ... ... ... ... 1070 0.579090 0.073486 8.653793 1.098166 1071 0.434396 0.165375 9.285664 3.535052 1072 -0.199908 -0.076105 9.285664 3.535052 1135 0.421423 0.040962 9.173498 0.891664 1073 1.231328 0.119685 9.173498 0.891664 [4190 rows x 15 columns] Data saved to 'maize_tem.csv'
test = alldata[(alldata['scenario'] == 1.5)&(alldata['geoid'] == 1001)]
print(test)
geoid scenario y_temp_base_t lnyield_obs har tmp \ 3799 1001 1.5 9.365828 9.09313 446.6812 22.744158 pre lnfer irrigation1 y_temp_base2015 yield_warming \ 3799 8.678571 6.438559 0.429451 9.419198 8431.748247 gap_yield_warming gap_production_warming yield_obs production_obs 3799 -0.462232 -0.020647 8.893981 0.397277
data = alldata[alldata['scenario'] == 3]
data1= data[data['gap_production_warming'] >0]
print(data)
print(data1)
print(np.min(data['gap_production_warming']),np.max(data['gap_production_warming']),np.mean(data['gap_production_warming']))
print(np.percentile(data['gap_production_warming'], 5),np.percentile(data['gap_production_warming'], 95))
print("sum of gap production", np.sum(data['gap_production_warming']))
print("sum of production total", np.sum(data['production_obs']))
print("sum of production total in warming", np.sum(data1['production_obs']))
# Replace values in 'gap_yield' greater than p90 with p90
data.loc[data['gap_production_warming'] > 2.1, 'gap_production_warming'] = 2.1
data.loc[data['gap_production_warming'] < -2.1, 'gap_production_warming'] = -2.1
data['geoid'] = data['geoid'].astype(float)
# Load US county boundaries shapefile using geopandas
shapefile_path = '/Users/chenchenren/postdoc/paper/2N and water-US/Data/cb_2018_us_county_20m/surface_shapefile.shp'
gdf_county = gpd.read_file(shapefile_path)
# Ensure 'GEOID' column in the shapefile has the 'object' data type
gdf_county['GEOID'] = gdf_county['GEOID'].astype(float)
# Rename the 'geoid' column to 'GEOID' in year_2020_data
data.rename(columns={'geoid': 'GEOID'}, inplace=True)
# Convert the 'GEOID' column in the 'data' DataFrame to 'object' data type
data['GEOID'] = data['GEOID'].astype(float)
# Now, merge 'data' with county boundaries based on 'GEOID' attribute
merged_data = gdf_county.merge(data, on='GEOID', how='left')
# Filter the data to include only geometries within the specified longitude and latitude range
filtered_data = merged_data.cx[-130:-60, 25:50]
cmap = mcolors.ListedColormap(['white'] + list(plt.cm.Spectral(np.linspace(0, 1, 255))))
norm = mcolors.Normalize(vmin=-2.2, vmax=2.2)
# Plot both county boundaries and 'y_temp' values on the same map within the specified range
fig, ax = plt.subplots(figsize=(10, 4))
shapefile_path = '/Users/chenchenren/postdoc/paper/2N and water-US/Data/cb_2018_us_state_20m/cb_2018_us_state_20m.shp'
gdf_state = gpd.read_file(shapefile_path)
gdf_state.rename(columns={'GEOID': 'GEOID_state'}, inplace=True)
gdf_state = gdf_state.cx[-130:-60, 25:50]
gdf_state.boundary.plot(ax=ax, linewidth=0.3, color='grey') # Plot county boundaries in grey
sc = plt.scatter([], [], c=[], cmap=cmap, norm=norm) # Create an empty ScalarMappable for the colorbar
# Define custom colorbar ticks
custom_ticks = [-2, -1, 0, 1, 2]
# Adjust the height of the colorbar by setting the shrink parameter
cbar = plt.colorbar(sc, ax=ax, label='Production change\n($10^{4}$ t)', orientation='vertical', pad=0.02, shrink=0.8, ticks=custom_ticks)
# Set the font size of the colorbar label
# Set the colorbar label text, font size, and padding
cbar.ax.set_ylabel('Production change\n($10^{4}$ tonnes/yr)', fontsize=16, fontfamily='Arial')
cbar.ax.tick_params(labelsize=16) # Adjust tick label fontsize
cbar.ax.set_yticklabels(custom_ticks, fontsize=16, fontfamily='Arial')
filtered_data.plot(column='gap_production_warming', cmap=cmap, ax=ax, norm=norm)
# Add labels, titles, or other plot configurations as needed
ax.set_xticks([]) # Remove x-axis ticks
ax.set_yticks([]) # Remove y-axis ticks
ax.set_frame_on(False) # Turn off the frame
plt.text(0.25, 0.2, '(c) Maize', transform=plt.gcf().transFigure,
fontsize=18, fontweight='bold', fontfamily='Arial', ha='left', va='bottom')
# Save the entire figure as a JPG with DPI 300
plt.savefig('maize_all_production_3.jpg', format='jpg', dpi=300, bbox_inches='tight', pad_inches=0.1)
# Show the plot
plt.show()
geoid scenario y_temp_base_t lnyield_obs har tmp \ 3248 1001 3.0 9.390342 9.093130 446.6812 22.744158 3250 1003 3.0 8.974818 9.058842 2463.8170 24.155120 3800 1005 3.0 9.207111 9.133368 808.6837 23.166352 3253 1009 3.0 9.452216 8.896661 517.2429 21.386674 3254 1011 3.0 9.456756 9.043855 386.7075 22.986824 ... ... ... ... ... ... ... 1065 56015 3.0 9.132208 9.101155 11643.5000 14.455618 1067 56021 3.0 9.237525 8.890697 4066.9880 13.101518 1070 56029 3.0 8.798372 9.065753 1269.0000 9.406989 1072 56031 3.0 9.271814 9.136227 3807.0000 13.419220 1073 56043 3.0 9.560979 9.124074 972.0000 12.689904 pre lnfer irrigation1 y_temp_base2015 yield_warming \ 3248 8.678571 6.438559 0.429451 9.419198 8640.996972 3250 11.511937 5.456232 1.376478 9.019800 8216.172302 3800 8.473777 6.234782 0.153295 9.295728 8473.937109 3253 9.825275 6.467910 0.143544 9.504825 6933.032790 3254 8.412398 6.550778 0.142055 9.440683 8603.527030 ... ... ... ... ... ... 1065 4.212446 6.046691 0.000000 9.216206 8243.303499 1067 4.287189 5.969113 0.000000 9.157526 7869.079132 1070 3.394136 5.616840 0.000000 8.733599 9232.882507 1072 4.092800 6.229183 0.000000 9.293578 9085.755718 1073 2.902834 6.780481 0.000000 9.435028 10404.826799 gap_yield_warming gap_production_warming yield_obs production_obs 3248 -0.252984 -0.011300 8.893981 0.397277 3250 -0.378021 -0.093137 8.594193 2.117452 3800 -0.785217 -0.063499 9.259154 0.748773 3253 -0.374500 -0.019371 7.307533 0.377977 3254 0.137175 0.005305 8.466352 0.327400 ... ... ... ... ... 1065 -0.722339 -0.841055 8.965642 10.439145 1067 0.604999 0.246052 7.264080 2.954293 1070 0.579090 0.073486 8.653793 1.098166 1072 -0.199908 -0.076105 9.285664 3.535052 1073 1.231328 0.119685 9.173498 0.891664 [2095 rows x 15 columns] geoid scenario y_temp_base_t lnyield_obs har tmp \ 3254 1011 3.0 9.456756 9.043855 386.7075 22.986824 3263 1031 3.0 9.558609 8.575626 2001.3650 23.365386 3273 1045 3.0 9.498349 8.883370 823.0410 23.346208 3290 1067 3.0 9.250031 8.858751 1445.5350 23.346362 3292 1069 3.0 9.573791 8.947829 925.2023 23.538006 ... ... ... ... ... ... ... 1062 55141 3.0 9.765169 9.058293 9748.5210 13.706112 1064 56003 3.0 9.178361 9.037472 1950.7500 12.497862 1067 56021 3.0 9.237525 8.890697 4066.9880 13.101518 1070 56029 3.0 8.798372 9.065753 1269.0000 9.406989 1073 56043 3.0 9.560979 9.124074 972.0000 12.689904 pre lnfer irrigation1 y_temp_base2015 yield_warming \ 3254 8.412398 6.550778 0.142055 9.440683 8603.527030 3263 9.032545 6.731169 0.113650 9.545826 5369.064610 3273 8.621824 6.616074 0.128754 9.472996 7396.215499 3290 8.435011 6.049888 0.176296 9.205317 7357.425076 3292 8.458624 6.752632 0.280205 9.526649 8062.435291 ... ... ... ... ... ... 1062 7.787753 6.466931 0.000000 9.753810 8687.599013 1064 2.879550 6.291661 0.000000 9.153077 8627.898510 1067 4.287189 5.969113 0.000000 9.157526 7869.079132 1070 3.394136 5.616840 0.000000 8.733599 9232.882507 1073 2.902834 6.780481 0.000000 9.435028 10404.826799 gap_yield_warming gap_production_warming yield_obs production_obs 3254 0.137175 0.005305 8.466352 0.327400 3263 0.068196 0.013648 5.300869 1.060897 3273 0.185164 0.015240 7.211051 0.593499 3290 0.321735 0.046508 7.035690 1.017034 3292 0.371259 0.034349 7.691176 0.711589 ... ... ... ... ... 1062 0.098123 0.095656 8.589476 8.373469 1064 0.215415 0.042022 8.412483 1.641065 1067 0.604999 0.246052 7.264080 2.954293 1070 0.579090 0.073486 8.653793 1.098166 1073 1.231328 0.119685 9.173498 0.891664 [597 rows x 15 columns] -15.121197088855459 14.56734233552591 -0.44799736855099326 -3.548367489973667 1.0275948159689103 sum of gap production -938.5544871143309 sum of production total 33517.93639932836 sum of production total in warming 10222.009370426244
/var/folders/vd/0_phd7hx2n51y4412862zww00000gp/T/ipykernel_78167/3664264077.py:15: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame. Try using .loc[row_indexer,col_indexer] = value instead See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy data['geoid'] = data['geoid'].astype(float) /var/folders/vd/0_phd7hx2n51y4412862zww00000gp/T/ipykernel_78167/3664264077.py:26: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy data.rename(columns={'geoid': 'GEOID'}, inplace=True) /var/folders/vd/0_phd7hx2n51y4412862zww00000gp/T/ipykernel_78167/3664264077.py:29: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame. Try using .loc[row_indexer,col_indexer] = value instead See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy data['GEOID'] = data['GEOID'].astype(float)