import pandas as pd
import numpy as np
import statsmodels.api as sm
import matplotlib.pyplot as plt
import matplotlib.colors as mcolors
import netCDF4 as nc
import pandas as pd
import os
import csv
from glob import glob
import xarray as xr
import matplotlib.ticker as ticker
import numpy as np
import pandas as pd
import statsmodels.api as sm
import matplotlib.pyplot as plt
from matplotlib.gridspec import GridSpec
import numpy as np
import pandas as pd
import statsmodels.api as sm
import matplotlib.pyplot as plt
from matplotlib.gridspec import GridSpec
from matplotlib.colors import LinearSegmentedColormap, Normalize
from mpl_toolkits.axes_grid1 import make_axes_locatable
from matplotlib.colors import Normalize
import geopandas as gpd
# Step 1: Data Preparation
# Load your dataset into a pandas DataFrame
os.chdir('/Users/chenchenren/postdoc/paper/2N and water-US/Figure 2 sensitivity/')
import pandas as pd
import statsmodels.api as sm
import matplotlib.pyplot as plt
from matplotlib.colors import Normalize
import geopandas as gpd
import numpy as np
# Load your data
excel_file = "sensitivity.xlsx"
sheet_name = "maize"
data = pd.read_excel(excel_file, sheet_name=sheet_name)
# Create interaction terms
data['tmp_tmp_interaction'] = data['tmp'] * data['tmp']
data['irrigation12'] = data['irrigation1'] * data['irrigation1']
data['lnfer_irrigation1_tmp_interaction'] = data['lnfer'] * data['irrigation1'] * data['tmp']
data['lnfer_irrigation12_tmp_tmp_interaction'] = data['lnfer']* data['irrigation1'] * data['irrigation1'] * data['tmp'] * data['tmp']
data['lnfer_tmp_interaction'] = data['lnfer'] * data['tmp']
data['lnfer_tmp_tmp_interaction'] = data['lnfer'] * data['tmp'] * data['tmp']
# Define predictor variables and response variable
predictor_variables1 = ['lnfer', 'tmp', 'irrigation1', 'irrigation12', 'tmp_tmp_interaction',
'lnfer_irrigation1_tmp_interaction', 'lnfer_irrigation12_tmp_tmp_interaction']
predictor_variables2 = ['lnfer', 'tmp', 'tmp_tmp_interaction', 'irrigation1',
'lnfer_tmp_interaction', 'lnfer_tmp_tmp_interaction']
response_variable = 'lnyield'
# Get unique group values
group_values = data['group'].unique()
# Initialize an empty list to store data for each group
group_dfs = []
for group_idx, group in enumerate(group_values):
# Filter data for the current group
group_data = data[data['group'] == group].copy() # Make a copy to avoid warnings
# Select predictor variables based on the group
predictor_variables = predictor_variables1 if group == 1 else predictor_variables2
# Perform regression analysis for the current group
X = sm.add_constant(group_data[predictor_variables])
y = group_data[response_variable]
reg = sm.OLS(y, X).fit()
group_data = data[data['group'] == group].copy()
if group == 1:
group_data['y_temp'] = (reg.params['tmp_tmp_interaction'] * group_data['tmp']*2 +
reg.params['tmp'] +
reg.params['lnfer_irrigation1_tmp_interaction'] * group_data['lnfer'] * group_data['irrigation1'] +
reg.params['lnfer_irrigation12_tmp_tmp_interaction'] * group_data['irrigation12'] * group_data['lnfer'] * group_data['tmp']*2)*100
else:
group_data['y_temp'] = (reg.params['tmp_tmp_interaction'] * group_data['tmp']*2 +
reg.params['tmp'] + reg.params['lnfer_tmp_interaction'] * group_data['lnfer'] +
reg.params['lnfer_tmp_tmp_interaction'] * group_data['lnfer'] * group_data['tmp']*2
)*100
group_dfs.append(group_data)
# Concatenate the dataframes for all groups
data = pd.concat(group_dfs)
# Filter data for the year 2020
year_2020_data = data[data['year'] == 2020]
# Convert the 'geoid' column in 'year_2020_data' to 'object' data type
year_2020_data['geoid'] = year_2020_data['geoid'].astype(str)
# Now 'y_temp' should be present in year_2020_data
print(year_2020_data)
geoid year tmp irrigation1 lnfer lnyield group \ 169 1053 2020 22.84545 0.0 6.406095 0.478777 2 396 1127 2020 21.43184 0.0 6.933648 0.939504 2 414 5001 2020 21.47970 0.0 6.214180 0.484029 2 425 5003 2020 22.11880 0.0 6.630030 0.687050 2 438 5017 2020 22.16659 0.0 6.416331 0.550745 2 ... ... ... ... ... ... ... ... 19459 55133 2020 15.17111 0.0 5.466717 0.263523 2 19472 55135 2020 13.45369 0.0 5.402363 0.227263 2 19485 55137 2020 14.05343 0.0 5.530668 0.306577 2 19497 55139 2020 14.36882 0.0 5.212457 0.112598 2 19510 55141 2020 13.37527 0.0 6.442788 0.840688 2 tmp_tmp_interaction irrigation12 lnfer_irrigation1_tmp_interaction \ 169 521.914586 0.0 0.0 396 459.323766 0.0 0.0 414 461.377512 0.0 0.0 425 489.241313 0.0 0.0 438 491.357712 0.0 0.0 ... ... ... ... 19459 230.162579 0.0 0.0 19472 181.001775 0.0 0.0 19485 197.498895 0.0 0.0 19497 206.462988 0.0 0.0 19510 178.897848 0.0 0.0 lnfer_irrigation12_tmp_tmp_interaction lnfer_tmp_interaction \ 169 0.0 146.350123 396 0.0 148.600835 414 0.0 133.478722 425 0.0 146.648308 438 0.0 142.228179 ... ... ... 19459 0.0 82.936165 19472 0.0 72.681717 19485 0.0 77.724856 19497 0.0 74.896856 19510 0.0 86.174029 lnfer_tmp_tmp_interaction y_temp 169 3343.434418 -10.141159 396 3184.789310 -8.909356 414 2867.082908 -7.985209 425 3243.684585 -9.513436 438 3152.713721 -9.219821 ... ... ... 19459 1258.233681 -0.925645 19472 977.837290 0.566673 19485 1092.300817 0.268444 19497 1076.179448 -0.631580 19510 1152.600906 3.039908 [697 rows x 14 columns]
/var/folders/vd/0_phd7hx2n51y4412862zww00000gp/T/ipykernel_5814/2188395441.py:67: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame. Try using .loc[row_indexer,col_indexer] = value instead See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy year_2020_data['geoid'] = year_2020_data['geoid'].astype(str)
# Load your data
excel_file = "sensitivity.xlsx"
sheet_name = "maize"
data = pd.read_excel(excel_file, sheet_name=sheet_name)
# Create interaction terms
data['tmp_tmp_interaction'] = data['tmp'] * data['tmp']
data['irrigation12'] = data['irrigation1'] * data['irrigation1']
data['lnfer_irrigation1_tmp_interaction'] = data['lnfer'] * data['irrigation1'] * data['tmp']
data['lnfer_irrigation12_tmp_tmp_interaction'] = data['lnfer']* data['irrigation1'] * data['irrigation1'] * data['tmp'] * data['tmp']
data['lnfer_tmp_interaction'] = data['lnfer'] * data['tmp']
data['lnfer_tmp_tmp_interaction'] = data['lnfer'] * data['tmp'] * data['tmp']
# Define predictor variables and response variable
predictor_variables1 = ['lnfer', 'tmp', 'irrigation1', 'irrigation12', 'tmp_tmp_interaction',
'lnfer_irrigation1_tmp_interaction', 'lnfer_irrigation12_tmp_tmp_interaction']
predictor_variables2 = ['lnfer', 'tmp', 'tmp_tmp_interaction', 'irrigation1',
'lnfer_tmp_interaction', 'lnfer_tmp_tmp_interaction']
response_variable = 'lnyield'
# Get unique group values
group_values = data['group'].unique()
# Initialize an empty list to store data for each group
group_dfs = []
for group_idx, group in enumerate(group_values):
# Filter data for the current group
group_data = data[data['group'] == group].copy() # Make a copy to avoid warnings
# Select predictor variables based on the group
predictor_variables = predictor_variables1 if group == 1 else predictor_variables2
# Perform regression analysis for the current group
X = sm.add_constant(group_data[predictor_variables])
y = group_data[response_variable]
reg = sm.OLS(y, X).fit()
group_data = data[data['group'] == group].copy()
if group == 1:
group_data['y_temp'] = (reg.params['tmp_tmp_interaction'] * group_data['tmp']*2 +
reg.params['tmp'] +
reg.params['lnfer_irrigation1_tmp_interaction'] * group_data['lnfer'] * group_data['irrigation1'] +
reg.params['lnfer_irrigation12_tmp_tmp_interaction'] * group_data['irrigation12'] * group_data['lnfer'] * group_data['tmp']*2)*100
else:
group_data['y_temp'] = (reg.params['tmp_tmp_interaction'] * group_data['tmp']*2 +
reg.params['tmp'] + reg.params['lnfer_tmp_interaction'] * group_data['lnfer'] +
reg.params['lnfer_tmp_tmp_interaction'] * group_data['lnfer'] * group_data['tmp']*2
)*100
group_dfs.append(group_data)
# Concatenate the dataframes for all groups
data = pd.concat(group_dfs)
# Filter data for the year 2020
year_2020_data = data[data['year'] == 2020]
# Convert the 'geoid' column in 'year_2020_data' to 'object' data type
year_2020_data['geoid'] = year_2020_data['geoid'].astype(str)
# Load US county boundaries shapefile using geopandas
shapefile_path = '/Users/chenchenren/postdoc/paper/2N and water-US/Data/cb_2018_us_county_20m/surface_shapefile.shp'
gdf_county = gpd.read_file(shapefile_path)
# Ensure 'GEOID' column in the shapefile has the 'object' data type
gdf_county['GEOID'] = gdf_county['GEOID'].astype(str)
# Rename the 'geoid' column to 'GEOID' in year_2020_data
year_2020_data.rename(columns={'geoid': 'GEOID'}, inplace=True)
# Now, merge 'year_2020_data' with county boundaries based on 'GEOID' attribute
merged_data = gdf_county.merge(year_2020_data, on='GEOID', how='left')
# Filter the data to include only geometries within the specified longitude and latitude range
filtered_data = merged_data.cx[-130:-60, 25:50]
# Create a custom colormap with white color for 0 values and normalize it to the data range
cmap = mcolors.ListedColormap(['white'] + list(plt.cm.RdYlGn(np.linspace(0, 1, 255))))
norm = mcolors.Normalize(vmin=-11, vmax=11)
filtered_data.loc[filtered_data['y_temp'] > 11, 'y_temp'] = 11
filtered_data.loc[filtered_data['y_temp'] < -11, 'y_temp'] = 11
# Plot both county boundaries and 'y_temp' values on the same map within the specified range
fig, ax = plt.subplots(figsize=(10, 4))
shapefile_path = '/Users/chenchenren/postdoc/paper/2N and water-US/Data/cb_2018_us_state_20m/cb_2018_us_state_20m.shp'
gdf_state = gpd.read_file(shapefile_path)
gdf_state.rename(columns={'GEOID': 'GEOID_state'}, inplace=True)
gdf_state = gdf_state.cx[-130:-60, 25:50]
gdf_state.boundary.plot(ax=ax, linewidth=0.3, color='grey') # Plot county boundaries in grey
sc = plt.scatter([], [], c=[], cmap=cmap, norm=norm) # Create an empty ScalarMappable for the colorbar
# Define custom colorbar ticks
custom_ticks = [-10, -5, 0, 5, 10]
# Adjust the height of the colorbar by setting the shrink parameter
cbar = plt.colorbar(sc, ax=ax, label='Yield change (%)', orientation='vertical', pad=-0.02, shrink=0.8, ticks=custom_ticks)
# Set the font size of the colorbar label
# Set the colorbar label text, font size, and padding
cbar.ax.set_ylabel('Yield change (%)', fontsize=16, fontfamily='Arial')
cbar.ax.tick_params(labelsize=16) # Adjust tick label fontsize
filtered_data.plot(column='y_temp', cmap=cmap, ax=ax, norm=norm)
# Add labels, titles, or other plot configurations as needed
ax.set_xticks([]) # Remove x-axis ticks
ax.set_yticks([]) # Remove y-axis ticks
ax.set_frame_on(False) # Turn off the frame
plt.text(0.27, 0.22, '(a) Maize', transform=plt.gcf().transFigure,
fontsize=18, fontweight='bold', fontfamily='Arial', ha='left', va='bottom')
# Save the entire figure as a JPG with DPI 300
plt.savefig('maize_tmp.jpg', format='jpg', dpi=300, bbox_inches='tight', pad_inches=0.1)
# Show the plot
plt.show()
/var/folders/vd/0_phd7hx2n51y4412862zww00000gp/T/ipykernel_5814/68807675.py:60: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame. Try using .loc[row_indexer,col_indexer] = value instead See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy year_2020_data['geoid'] = year_2020_data['geoid'].astype(str) /var/folders/vd/0_phd7hx2n51y4412862zww00000gp/T/ipykernel_5814/68807675.py:71: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy year_2020_data.rename(columns={'geoid': 'GEOID'}, inplace=True)
# Load your data
excel_file = "sensitivity.xlsx"
sheet_name = "maize_pre"
data = pd.read_excel(excel_file, sheet_name=sheet_name)
# Create interaction terms
data['pre_pre_interaction'] = data['pre'] * data['pre']
data['irrigation12'] = data['irrigation1'] * data['irrigation1']
data['lnfer_irrigation1_pre_interaction'] = data['lnfer'] * data['irrigation1'] * data['pre']
data['lnfer_irrigation12_pre_pre_interaction'] = data['lnfer']* data['irrigation1'] * data['irrigation1'] * data['pre'] * data['pre']
data['lnfer_pre_interaction'] = data['lnfer'] * data['pre']
data['lnfer_pre_pre_interaction'] = data['lnfer'] * data['pre'] * data['pre']
# Define predictor variables and response variable
predictor_variables1 = ['lnfer', 'pre', 'irrigation1', 'irrigation12', 'pre_pre_interaction',
'lnfer_irrigation1_pre_interaction', 'lnfer_irrigation12_pre_pre_interaction']
predictor_variables2 = ['lnfer', 'pre', 'pre_pre_interaction', 'irrigation1',
'lnfer_pre_interaction', 'lnfer_pre_pre_interaction']
response_variable = 'lnyield'
# Get unique group values
group_values = data['group'].unique()
# Initialize an empty list to store data for each group
group_dfs = []
for group_idx, group in enumerate(group_values):
# Filter data for the current group
group_data = data[data['group'] == group].copy() # Make a copy to avoid warnings
# Select predictor variables based on the group
predictor_variables = predictor_variables1 if group == 1 else predictor_variables2
# Perform regression analysis for the current group
X = sm.add_constant(group_data[predictor_variables])
y = group_data[response_variable]
reg = sm.OLS(y, X).fit()
group_data = data[data['group'] == group].copy()
if group == 1:
group_data['y_pre'] = (reg.params['pre_pre_interaction'] * group_data['pre']*2 +
reg.params['pre'] +
reg.params['lnfer_irrigation1_pre_interaction'] * group_data['lnfer'] * group_data['irrigation1'] +
reg.params['lnfer_irrigation12_pre_pre_interaction'] * group_data['irrigation12'] * group_data['lnfer'] * group_data['pre']*2)*100
else:
group_data['y_pre'] = (reg.params['pre_pre_interaction'] * group_data['pre']*2 +
reg.params['pre'] + reg.params['lnfer_pre_interaction'] * group_data['lnfer'] +
reg.params['lnfer_pre_pre_interaction'] * group_data['lnfer'] * group_data['pre']*2
)*100
group_dfs.append(group_data)
# Concatenate the dataframes for all groups
data = pd.concat(group_dfs)
# Filter data for the year 2020
year_2020_data = data[data['year'] == 2020]
# Convert the 'geoid' column in 'year_2020_data' to 'object' data type
year_2020_data['geoid'] = year_2020_data['geoid'].astype(str)
ds_pre=year_2020_data[year_2020_data['y_pre']>0]
print(year_2020_data)
print(ds_pre)
# Load US county boundaries shapefile using geopandas
shapefile_path = '/Users/chenchenren/postdoc/paper/2N and water-US/Data/cb_2018_us_county_20m/surface_shapefile.shp'
gdf_county = gpd.read_file(shapefile_path)
# Ensure 'GEOID' column in the shapefile has the 'object' data type
gdf_county['GEOID'] = gdf_county['GEOID'].astype(str)
# Rename the 'geoid' column to 'GEOID' in year_2020_data
year_2020_data.rename(columns={'geoid': 'GEOID'}, inplace=True)
# Now, merge 'year_2020_data' with county boundaries based on 'GEOID' attribute
merged_data = gdf_county.merge(year_2020_data, on='GEOID', how='left')
# Filter the data to include only geometries within the specified longitude and latitude range
filtered_data = merged_data.cx[-130:-60, 25:50]
# Create a custom colormap with white color for 0 values and normalize it to the data range
cmap = mcolors.ListedColormap(['white'] + list(plt.cm.RdYlGn(np.linspace(0, 1, 255))))
norm = mcolors.Normalize(vmin=-11, vmax=11)
filtered_data.loc[filtered_data['y_pre'] > 11, 'y_pre'] = 11
filtered_data.loc[filtered_data['y_pre'] < -11, 'y_pre'] = 11
# Plot both county boundaries and 'y_temp' values on the same map within the specified range
fig, ax = plt.subplots(figsize=(10, 4))
shapefile_path = '/Users/chenchenren/postdoc/paper/2N and water-US/Data/cb_2018_us_state_20m/cb_2018_us_state_20m.shp'
gdf_state = gpd.read_file(shapefile_path)
gdf_state.rename(columns={'GEOID': 'GEOID_state'}, inplace=True)
gdf_state = gdf_state.cx[-130:-60, 25:50]
gdf_state.boundary.plot(ax=ax, linewidth=0.3, color='grey') # Plot county boundaries in grey
sc = plt.scatter([], [], c=[], cmap=cmap, norm=norm) # Create an empty ScalarMappable for the colorbar
# Define custom colorbar ticks
custom_ticks = [-10, -5, 0, 5, 10]
# Adjust the height of the colorbar by setting the shrink parameter
cbar = plt.colorbar(sc, ax=ax, label='Yield change (%)', orientation='vertical', pad=-0.02, shrink=0.8, ticks=custom_ticks)
# Set the font size of the colorbar label
# Set the colorbar label text, font size, and padding
cbar.ax.set_ylabel('Yield change (%)', fontsize=16, fontfamily='Arial')
cbar.ax.tick_params(labelsize=16) # Adjust tick label fontsize
filtered_data.plot(column='y_pre', cmap=cmap, ax=ax, norm=norm)
# Add labels, titles, or other plot configurations as needed
ax.set_xticks([]) # Remove x-axis ticks
ax.set_yticks([]) # Remove y-axis ticks
ax.set_frame_on(False) # Turn off the frame
plt.text(0.27, 0.22, '(b) Maize', transform=plt.gcf().transFigure,
fontsize=18, fontweight='bold', fontfamily='Arial', ha='left', va='bottom')
# Save the entire figure as a JPG with DPI 300
plt.savefig('maize_pre.jpg', format='jpg', dpi=300, bbox_inches='tight', pad_inches=0.1)
# Show the plot
plt.show()
/var/folders/vd/0_phd7hx2n51y4412862zww00000gp/T/ipykernel_5814/3712000809.py:60: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame. Try using .loc[row_indexer,col_indexer] = value instead See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy year_2020_data['geoid'] = year_2020_data['geoid'].astype(str) /var/folders/vd/0_phd7hx2n51y4412862zww00000gp/T/ipykernel_5814/3712000809.py:73: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy year_2020_data.rename(columns={'geoid': 'GEOID'}, inplace=True)
geoid year pre irrigation1 lnfer lnyield group \ 5 1001 2020 10.057620 0.235228 6.432470 4.192275 1 17 1003 2020 12.874060 0.000427 5.481153 3.767513 1 26 1005 2020 9.962674 0.215025 6.248376 4.104936 1 33 1009 2020 10.261350 0.884062 6.445511 4.200965 1 40 1013 2020 10.629650 0.001124 6.965954 4.455058 1 ... ... ... ... ... ... ... ... 19459 55133 2020 7.534492 0.000000 5.466717 4.037972 2 19472 55135 2020 7.999479 0.000000 5.402363 4.023465 2 19485 55137 2020 7.889553 0.000000 5.530668 4.096843 2 19497 55139 2020 7.754859 0.000000 5.212457 3.893867 2 19510 55141 2020 7.967925 0.000000 6.442788 4.662855 2 pre_pre_interaction irrigation12 lnfer_irrigation1_pre_interaction \ 5 101.155720 5.533231e-02 15.218168 17 165.741421 1.827562e-07 0.030166 26 99.254873 4.623575e-02 13.385421 33 105.295304 7.815649e-01 58.471520 40 112.989459 1.262926e-06 0.083213 ... ... ... ... 19459 56.768570 0.000000e+00 0.000000 19472 63.991664 0.000000e+00 0.000000 19485 62.245047 0.000000e+00 0.000000 19497 60.137838 0.000000e+00 0.000000 19510 63.487829 0.000000e+00 0.000000 lnfer_irrigation12_pre_pre_interaction lnfer_pre_interaction \ 5 36.003688 64.695339 17 0.000166 70.564693 26 28.674570 62.250533 33 530.434069 66.139644 40 0.000994 74.045653 ... ... ... 19459 0.000000 41.188936 19472 0.000000 43.216089 19485 0.000000 43.634498 19497 0.000000 40.421869 19510 0.000000 51.335652 lnfer_pre_pre_interaction y_pre 5 650.681135 2.780291 17 908.454086 -1.935113 26 620.181768 2.981209 33 678.682039 1.695591 40 787.079375 2.054310 ... ... ... 19459 310.337705 5.752193 19472 345.706199 5.038266 19485 344.256687 5.253525 19497 313.465895 5.321767 19510 409.038622 5.512467 [1629 rows x 14 columns] geoid year pre irrigation1 lnfer lnyield group \ 5 1001 2020 10.057620 0.235228 6.432470 4.192275 1 26 1005 2020 9.962674 0.215025 6.248376 4.104936 1 33 1009 2020 10.261350 0.884062 6.445511 4.200965 1 40 1013 2020 10.629650 0.001124 6.965954 4.455058 1 49 1015 2020 10.575430 0.701355 6.420520 4.195369 1 ... ... ... ... ... ... ... ... 19459 55133 2020 7.534492 0.000000 5.466717 4.037972 2 19472 55135 2020 7.999479 0.000000 5.402363 4.023465 2 19485 55137 2020 7.889553 0.000000 5.530668 4.096843 2 19497 55139 2020 7.754859 0.000000 5.212457 3.893867 2 19510 55141 2020 7.967925 0.000000 6.442788 4.662855 2 pre_pre_interaction irrigation12 lnfer_irrigation1_pre_interaction \ 5 101.155720 0.055332 15.218168 26 99.254873 0.046236 13.385421 33 105.295304 0.781565 58.471520 40 112.989459 0.000001 0.083213 49 111.839720 0.491899 47.621850 ... ... ... ... 19459 56.768570 0.000000 0.000000 19472 63.991664 0.000000 0.000000 19485 62.245047 0.000000 0.000000 19497 60.137838 0.000000 0.000000 19510 63.487829 0.000000 0.000000 lnfer_irrigation12_pre_pre_interaction lnfer_pre_interaction \ 5 36.003688 64.695339 26 28.674570 62.250533 33 530.434069 66.139644 40 0.000994 74.045653 49 353.217584 67.899760 ... ... ... 19459 0.000000 41.188936 19472 0.000000 43.216089 19485 0.000000 43.634498 19497 0.000000 40.421869 19510 0.000000 51.335652 lnfer_pre_pre_interaction y_pre 5 650.681135 2.780291 26 620.181768 2.981209 33 678.682039 1.695591 40 787.079375 2.054310 49 718.069157 1.333126 ... ... ... 19459 310.337705 5.752193 19472 345.706199 5.038266 19485 344.256687 5.253525 19497 313.465895 5.321767 19510 409.038622 5.512467 [1568 rows x 14 columns]
# Load your data
excel_file = "sensitivity.xlsx"
sheet_name = "soybean"
data = pd.read_excel(excel_file, sheet_name=sheet_name)
# Create interaction terms
data['tmp_tmp_interaction'] = data['tmp'] * data['tmp']
data['irrigation12'] = data['irrigation1'] * data['irrigation1']
data['lnfer_irrigation1_tmp_interaction'] = data['lnfer'] * data['irrigation1'] * data['tmp']
data['lnfer_irrigation12_tmp_tmp_interaction'] = data['lnfer']* data['irrigation1'] * data['irrigation1'] * data['tmp'] * data['tmp']
data['lnfer_tmp_interaction'] = data['lnfer'] * data['tmp']
data['lnfer_tmp_tmp_interaction'] = data['lnfer'] * data['tmp'] * data['tmp']
# Define predictor variables and response variable
predictor_variables1 = ['lnfer', 'tmp', 'irrigation1', 'irrigation12', 'tmp_tmp_interaction',
'lnfer_irrigation1_tmp_interaction', 'lnfer_irrigation12_tmp_tmp_interaction']
predictor_variables2 = ['lnfer', 'tmp', 'tmp_tmp_interaction', 'irrigation1',
'lnfer_tmp_interaction', 'lnfer_tmp_tmp_interaction']
response_variable = 'lnyield'
# Get unique group values
group_values = data['group'].unique()
# Initialize an empty list to store data for each group
group_dfs = []
for group_idx, group in enumerate(group_values):
# Filter data for the current group
group_data = data[data['group'] == group].copy() # Make a copy to avoid warnings
# Select predictor variables based on the group
predictor_variables = predictor_variables1 if group == 1 else predictor_variables2
# Perform regression analysis for the current group
X = sm.add_constant(group_data[predictor_variables])
y = group_data[response_variable]
reg = sm.OLS(y, X).fit()
group_data = data[data['group'] == group].copy()
if group == 1:
group_data['y_temp'] = (reg.params['tmp_tmp_interaction'] * group_data['tmp']*2 +
reg.params['tmp'] +
reg.params['lnfer_irrigation1_tmp_interaction'] * group_data['lnfer'] * group_data['irrigation1'] +
reg.params['lnfer_irrigation12_tmp_tmp_interaction'] * group_data['irrigation12'] * group_data['lnfer'] * group_data['tmp']*2)*100
else:
group_data['y_temp'] = (reg.params['tmp_tmp_interaction'] * group_data['tmp']*2 +
reg.params['tmp'] + reg.params['lnfer_tmp_interaction'] * group_data['lnfer'] +
reg.params['lnfer_tmp_tmp_interaction'] * group_data['lnfer'] * group_data['tmp']*2
)*100
group_dfs.append(group_data)
# Concatenate the dataframes for all groups
data = pd.concat(group_dfs)
# Filter data for the year 2020
year_2020_data = data[data['year'] == 2020]
# Convert the 'geoid' column in 'year_2020_data' to 'object' data type
year_2020_data['geoid'] = year_2020_data['geoid'].astype(str)
# Load US county boundaries shapefile using geopandas
shapefile_path = '/Users/chenchenren/postdoc/paper/2N and water-US/Data/cb_2018_us_county_20m/surface_shapefile.shp'
gdf_county = gpd.read_file(shapefile_path)
# Ensure 'GEOID' column in the shapefile has the 'object' data type
gdf_county['GEOID'] = gdf_county['GEOID'].astype(str)
# Rename the 'geoid' column to 'GEOID' in year_2020_data
year_2020_data.rename(columns={'geoid': 'GEOID'}, inplace=True)
# Now, merge 'year_2020_data' with county boundaries based on 'GEOID' attribute
merged_data = gdf_county.merge(year_2020_data, on='GEOID', how='left')
# Filter the data to include only geometries within the specified longitude and latitude range
filtered_data = merged_data.cx[-130:-60, 25:50]
# Calculate the minimum and maximum values of 'y_temp' for normalization
min_temp = filtered_data['y_temp'].min()
max_temp = filtered_data['y_temp'].max()
# Create a custom colormap with an emphasis on the specified range
cmap = mcolors.ListedColormap(['white'] + list(plt.cm.PuOr(np.linspace(0, 1, 255))))
norm = mcolors.Normalize(vmin=-11, vmax=11)
filtered_data.loc[filtered_data['y_temp'] > 11, 'y_temp'] = 11
filtered_data.loc[filtered_data['y_temp'] < -11, 'y_temp'] = 11
# Plot both county boundaries and 'y_temp' values on the same map within the specified range
fig, ax = plt.subplots(figsize=(10, 4))
shapefile_path = '/Users/chenchenren/postdoc/paper/2N and water-US/Data/cb_2018_us_state_20m/cb_2018_us_state_20m.shp'
gdf_state = gpd.read_file(shapefile_path)
gdf_state.rename(columns={'GEOID': 'GEOID_state'}, inplace=True)
gdf_state = gdf_state.cx[-130:-60, 25:50]
gdf_state.boundary.plot(ax=ax, linewidth=0.3, color='grey') # Plot county boundaries in grey
sc = plt.scatter([], [], c=[], cmap=cmap, norm=norm) # Create an empty ScalarMappable for the colorbar
# Define custom colorbar ticks
custom_ticks = [ -10, -5, 0, 5, 10]
# Adjust the height of the colorbar by setting the shrink parameter
cbar = plt.colorbar(sc, ax=ax, label='Yield change (%)', orientation='vertical', pad=-0.02, shrink=0.8, ticks=custom_ticks)
# Set the font size of the colorbar label
# Set the colorbar label text, font size, and padding
cbar.ax.set_ylabel('Yield change (%)', fontsize=16, fontfamily='Arial')
cbar.ax.tick_params(labelsize=16) # Adjust tick label fontsize
filtered_data.plot(column='y_temp', cmap=cmap, ax=ax, norm=norm)
# Add labels, titles, or other plot configurations as needed
ax.set_xticks([]) # Remove x-axis ticks
ax.set_yticks([]) # Remove y-axis ticks
ax.set_frame_on(False) # Turn off the frame
plt.text(0.27, 0.22, '(c) Soybean', transform=plt.gcf().transFigure,
fontsize=18, fontweight='bold', fontfamily='Arial', ha='left', va='bottom')
# Save the entire figure as a JPG with DPI 300
plt.savefig('soybean_tmp.jpg', format='jpg', dpi=300, bbox_inches='tight', pad_inches=0.1)
# Show the plot
plt.show()
/var/folders/vd/0_phd7hx2n51y4412862zww00000gp/T/ipykernel_17107/1920904350.py:60: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame. Try using .loc[row_indexer,col_indexer] = value instead See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy year_2020_data['geoid'] = year_2020_data['geoid'].astype(str) /var/folders/vd/0_phd7hx2n51y4412862zww00000gp/T/ipykernel_17107/1920904350.py:72: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy year_2020_data.rename(columns={'geoid': 'GEOID'}, inplace=True)
# Load your data
excel_file = "sensitivity.xlsx"
sheet_name = "soybean_pre"
data = pd.read_excel(excel_file, sheet_name=sheet_name)
# Create interaction terms
data['pre_pre_interaction'] = data['pre'] * data['pre']
data['irrigation12'] = data['irrigation1'] * data['irrigation1']
data['lnfer_irrigation1_pre_interaction'] = data['lnfer'] * data['irrigation1'] * data['pre']
data['lnfer_irrigation12_pre_pre_interaction'] = data['lnfer']* data['irrigation1'] * data['irrigation1'] * data['pre'] * data['pre']
data['lnfer_pre_interaction'] = data['lnfer'] * data['pre']
data['lnfer_pre_pre_interaction'] = data['lnfer'] * data['pre'] * data['pre']
# Define predictor variables and response variable
predictor_variables1 = ['lnfer', 'pre', 'irrigation1', 'irrigation12', 'pre_pre_interaction',
'lnfer_irrigation1_pre_interaction', 'lnfer_irrigation12_pre_pre_interaction']
predictor_variables2 = ['lnfer', 'pre', 'pre_pre_interaction', 'irrigation1',
'lnfer_pre_interaction', 'lnfer_pre_pre_interaction']
response_variable = 'lnyield'
# Get unique group values
group_values = data['group'].unique()
# Initialize an empty list to store data for each group
group_dfs = []
for group_idx, group in enumerate(group_values):
# Filter data for the current group
group_data = data[data['group'] == group].copy() # Make a copy to avoid warnings
# Select predictor variables based on the group
predictor_variables = predictor_variables1 if group == 1 else predictor_variables2
# Perform regression analysis for the current group
X = sm.add_constant(group_data[predictor_variables])
y = group_data[response_variable]
reg = sm.OLS(y, X).fit()
group_data = data[data['group'] == group].copy()
if group == 1:
group_data['y_pre'] = (reg.params['pre_pre_interaction'] * group_data['pre']*2 +
reg.params['pre'] +
reg.params['lnfer_irrigation1_pre_interaction'] * group_data['lnfer'] * group_data['irrigation1'] +
reg.params['lnfer_irrigation12_pre_pre_interaction'] * group_data['irrigation12'] * group_data['lnfer'] * group_data['pre']*2)*100
else:
group_data['y_pre'] = (reg.params['pre_pre_interaction'] * group_data['pre']*2 +
reg.params['pre'] + reg.params['lnfer_pre_interaction'] * group_data['lnfer'] +
reg.params['lnfer_pre_pre_interaction'] * group_data['lnfer'] * group_data['pre']*2
)*100
group_dfs.append(group_data)
# Concatenate the dataframes for all groups
data = pd.concat(group_dfs)
# Filter data for the year 2020
year_2020_data = data[data['year'] == 2020]
# Convert the 'geoid' column in 'year_2020_data' to 'object' data type
year_2020_data['geoid'] = year_2020_data['geoid'].astype(str)
# Load US county boundaries shapefile using geopandas
shapefile_path = '/Users/chenchenren/postdoc/paper/2N and water-US/Data/cb_2018_us_county_20m/surface_shapefile.shp'
gdf_county = gpd.read_file(shapefile_path)
# Ensure 'GEOID' column in the shapefile has the 'object' data type
gdf_county['GEOID'] = gdf_county['GEOID'].astype(str)
# Rename the 'geoid' column to 'GEOID' in year_2020_data
year_2020_data.rename(columns={'geoid': 'GEOID'}, inplace=True)
# Now, merge 'year_2020_data' with county boundaries based on 'GEOID' attribute
merged_data = gdf_county.merge(year_2020_data, on='GEOID', how='left')
# Filter the data to include only geometries within the specified longitude and latitude range
filtered_data = merged_data.cx[-130:-60, 25:50]
# Create a custom colormap with an emphasis on the specified range
cmap = mcolors.ListedColormap(['white'] + list(plt.cm.PuOr(np.linspace(0, 1, 255))))
norm = mcolors.Normalize(vmin=-11, vmax=11)
filtered_data.loc[filtered_data['y_pre'] > 11, 'y_pre'] = 11
filtered_data.loc[filtered_data['y_pre'] < -11, 'y_pre'] = 11
# Plot both county boundaries and 'y_temp' values on the same map within the specified range
fig, ax = plt.subplots(figsize=(10, 4))
shapefile_path = '/Users/chenchenren/postdoc/paper/2N and water-US/Data/cb_2018_us_state_20m/cb_2018_us_state_20m.shp'
gdf_state = gpd.read_file(shapefile_path)
gdf_state.rename(columns={'GEOID': 'GEOID_state'}, inplace=True)
gdf_state = gdf_state.cx[-130:-60, 25:50]
gdf_state.boundary.plot(ax=ax, linewidth=0.3, color='grey') # Plot county boundaries in grey
sc = plt.scatter([], [], c=[], cmap=cmap, norm=norm) # Create an empty ScalarMappable for the colorbar
# Define custom colorbar ticks
custom_ticks = [ -10, -5, 0, 5, 10]
# Adjust the height of the colorbar by s
# Adjust the height of the colorbar by setting the shrink parameter
cbar = plt.colorbar(sc, ax=ax, label='Yield change (%)', orientation='vertical', pad=-0.02, shrink=0.8, ticks=custom_ticks)
# Set the font size of the colorbar label
# Set the colorbar label text, font size, and padding
cbar.ax.set_ylabel('Yield change (%)', fontsize=16, fontfamily='Arial')
cbar.ax.tick_params(labelsize=16) # Adjust tick label fontsize
filtered_data.plot(column='y_pre', cmap=cmap, ax=ax, norm=norm)
# Add labels, titles, or other plot configurations as needed
ax.set_xticks([]) # Remove x-axis ticks
ax.set_yticks([]) # Remove y-axis ticks
ax.set_frame_on(False) # Turn off the frame
plt.text(0.27, 0.22, '(d) Soybean', transform=plt.gcf().transFigure,
fontsize=18, fontweight='bold', fontfamily='Arial', ha='left', va='bottom')
# Save the entire figure as a JPG with DPI 300
plt.savefig('soybean_pre.jpg', format='jpg', dpi=300, bbox_inches='tight', pad_inches=0.1)
# Show the plot
plt.show()
/var/folders/vd/0_phd7hx2n51y4412862zww00000gp/T/ipykernel_17107/3746002272.py:60: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame. Try using .loc[row_indexer,col_indexer] = value instead See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy year_2020_data['geoid'] = year_2020_data['geoid'].astype(str) /var/folders/vd/0_phd7hx2n51y4412862zww00000gp/T/ipykernel_17107/3746002272.py:71: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy year_2020_data.rename(columns={'geoid': 'GEOID'}, inplace=True)