import pandas as pd
import numpy as np
import statsmodels.api as sm
import matplotlib.pyplot as plt
import matplotlib.colors as mcolors

import netCDF4 as nc
import pandas as pd
import os
import csv
from glob import glob
import xarray as xr
import matplotlib.ticker as ticker 

import numpy as np
import pandas as pd
import statsmodels.api as sm
import matplotlib.pyplot as plt
from matplotlib.gridspec import GridSpec

import numpy as np
import pandas as pd
import statsmodels.api as sm
import matplotlib.pyplot as plt
from matplotlib.gridspec import GridSpec
from matplotlib.colors import LinearSegmentedColormap, Normalize
from mpl_toolkits.axes_grid1 import make_axes_locatable

from matplotlib.colors import Normalize
import geopandas as gpd


# Step 1: Data Preparation
# Load your dataset into a pandas DataFrame
os.chdir('/Users/chenchenren/postdoc/paper/2N and water-US/Figure 2 sensitivity/')


import pandas as pd
import statsmodels.api as sm
import matplotlib.pyplot as plt
from matplotlib.colors import Normalize
import geopandas as gpd
import numpy as np

# Load your data
excel_file = "sensitivity.xlsx"
sheet_name = "maize"
data = pd.read_excel(excel_file, sheet_name=sheet_name)

# Create interaction terms
data['tmp_tmp_interaction'] = data['tmp'] * data['tmp']
data['irrigation12'] = data['irrigation1'] * data['irrigation1']
data['lnfer_irrigation1_tmp_interaction'] = data['lnfer'] * data['irrigation1'] * data['tmp']
data['lnfer_irrigation12_tmp_tmp_interaction'] = data['lnfer']* data['irrigation1'] * data['irrigation1'] * data['tmp'] * data['tmp']

data['lnfer_tmp_interaction'] = data['lnfer'] * data['tmp']
data['lnfer_tmp_tmp_interaction'] = data['lnfer'] * data['tmp'] * data['tmp']

# Define predictor variables and response variable
predictor_variables1 = ['lnfer', 'tmp', 'irrigation1', 'irrigation12', 'tmp_tmp_interaction',
                       'lnfer_irrigation1_tmp_interaction', 'lnfer_irrigation12_tmp_tmp_interaction']

predictor_variables2 = ['lnfer', 'tmp', 'tmp_tmp_interaction', 'irrigation1',
                       'lnfer_tmp_interaction', 'lnfer_tmp_tmp_interaction']

response_variable = 'lnyield'

# Get unique group values
group_values = data['group'].unique()

# Initialize an empty list to store data for each group
group_dfs = []
for group_idx, group in enumerate(group_values):
    # Filter data for the current group
    group_data = data[data['group'] == group].copy()  # Make a copy to avoid warnings
    
    # Select predictor variables based on the group
    predictor_variables = predictor_variables1 if group == 1 else predictor_variables2
    
    # Perform regression analysis for the current group
    X = sm.add_constant(group_data[predictor_variables])
    y = group_data[response_variable]
    reg = sm.OLS(y, X).fit()
    
    group_data = data[data['group'] == group].copy()
    if group == 1:
        group_data['y_temp'] =  (reg.params['tmp_tmp_interaction'] * group_data['tmp']*2 +
                          reg.params['tmp']  +
                          reg.params['lnfer_irrigation1_tmp_interaction'] * group_data['lnfer'] * group_data['irrigation1'] +
                          reg.params['lnfer_irrigation12_tmp_tmp_interaction'] * group_data['irrigation12'] * group_data['lnfer'] * group_data['tmp']*2)*100
    else:
        group_data['y_temp'] = (reg.params['tmp_tmp_interaction'] * group_data['tmp']*2 +
            reg.params['tmp'] + reg.params['lnfer_tmp_interaction'] * group_data['lnfer'] +
            reg.params['lnfer_tmp_tmp_interaction'] * group_data['lnfer'] * group_data['tmp']*2
        )*100
        group_dfs.append(group_data)

# Concatenate the dataframes for all groups
data = pd.concat(group_dfs)

# Filter data for the year 2020
year_2020_data = data[data['year'] == 2020]
# Convert the 'geoid' column in 'year_2020_data' to 'object' data type
year_2020_data['geoid'] = year_2020_data['geoid'].astype(str)

# Now 'y_temp' should be present in year_2020_data
print(year_2020_data)

       geoid  year       tmp  irrigation1     lnfer   lnyield  group  \
169     1053  2020  22.84545          0.0  6.406095  0.478777      2   
396     1127  2020  21.43184          0.0  6.933648  0.939504      2   
414     5001  2020  21.47970          0.0  6.214180  0.484029      2   
425     5003  2020  22.11880          0.0  6.630030  0.687050      2   
438     5017  2020  22.16659          0.0  6.416331  0.550745      2   
...      ...   ...       ...          ...       ...       ...    ...   
19459  55133  2020  15.17111          0.0  5.466717  0.263523      2   
19472  55135  2020  13.45369          0.0  5.402363  0.227263      2   
19485  55137  2020  14.05343          0.0  5.530668  0.306577      2   
19497  55139  2020  14.36882          0.0  5.212457  0.112598      2   
19510  55141  2020  13.37527          0.0  6.442788  0.840688      2   

       tmp_tmp_interaction  irrigation12  lnfer_irrigation1_tmp_interaction  \
169             521.914586           0.0                                0.0   
396             459.323766           0.0                                0.0   
414             461.377512           0.0                                0.0   
425             489.241313           0.0                                0.0   
438             491.357712           0.0                                0.0   
...                    ...           ...                                ...   
19459           230.162579           0.0                                0.0   
19472           181.001775           0.0                                0.0   
19485           197.498895           0.0                                0.0   
19497           206.462988           0.0                                0.0   
19510           178.897848           0.0                                0.0   

       lnfer_irrigation12_tmp_tmp_interaction  lnfer_tmp_interaction  \
169                                       0.0             146.350123   
396                                       0.0             148.600835   
414                                       0.0             133.478722   
425                                       0.0             146.648308   
438                                       0.0             142.228179   
...                                       ...                    ...   
19459                                     0.0              82.936165   
19472                                     0.0              72.681717   
19485                                     0.0              77.724856   
19497                                     0.0              74.896856   
19510                                     0.0              86.174029   

       lnfer_tmp_tmp_interaction     y_temp  
169                  3343.434418 -10.141159  
396                  3184.789310  -8.909356  
414                  2867.082908  -7.985209  
425                  3243.684585  -9.513436  
438                  3152.713721  -9.219821  
...                          ...        ...  
19459                1258.233681  -0.925645  
19472                 977.837290   0.566673  
19485                1092.300817   0.268444  
19497                1076.179448  -0.631580  
19510                1152.600906   3.039908  

[697 rows x 14 columns]

/var/folders/vd/0_phd7hx2n51y4412862zww00000gp/T/ipykernel_5814/2188395441.py:67: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  year_2020_data['geoid'] = year_2020_data['geoid'].astype(str)


# Load your data
excel_file = "sensitivity.xlsx"
sheet_name = "maize"
data = pd.read_excel(excel_file, sheet_name=sheet_name)

# Create interaction terms
data['tmp_tmp_interaction'] = data['tmp'] * data['tmp']
data['irrigation12'] = data['irrigation1'] * data['irrigation1']
data['lnfer_irrigation1_tmp_interaction'] = data['lnfer'] * data['irrigation1'] * data['tmp']
data['lnfer_irrigation12_tmp_tmp_interaction'] = data['lnfer']* data['irrigation1'] * data['irrigation1'] * data['tmp'] * data['tmp']

data['lnfer_tmp_interaction'] = data['lnfer'] * data['tmp']
data['lnfer_tmp_tmp_interaction'] = data['lnfer'] * data['tmp'] * data['tmp']

# Define predictor variables and response variable
predictor_variables1 = ['lnfer', 'tmp', 'irrigation1', 'irrigation12', 'tmp_tmp_interaction',
                       'lnfer_irrigation1_tmp_interaction', 'lnfer_irrigation12_tmp_tmp_interaction']

predictor_variables2 = ['lnfer', 'tmp', 'tmp_tmp_interaction', 'irrigation1',
                       'lnfer_tmp_interaction', 'lnfer_tmp_tmp_interaction']

response_variable = 'lnyield'

# Get unique group values
group_values = data['group'].unique()

# Initialize an empty list to store data for each group
group_dfs = []
for group_idx, group in enumerate(group_values):
    # Filter data for the current group
    group_data = data[data['group'] == group].copy()  # Make a copy to avoid warnings
    
    # Select predictor variables based on the group
    predictor_variables = predictor_variables1 if group == 1 else predictor_variables2
    
    # Perform regression analysis for the current group
    X = sm.add_constant(group_data[predictor_variables])
    y = group_data[response_variable]
    reg = sm.OLS(y, X).fit()
    
    group_data = data[data['group'] == group].copy()
    if group == 1:
        group_data['y_temp'] =  (reg.params['tmp_tmp_interaction'] * group_data['tmp']*2 +
                          reg.params['tmp']  +
                          reg.params['lnfer_irrigation1_tmp_interaction'] * group_data['lnfer'] * group_data['irrigation1'] +
                          reg.params['lnfer_irrigation12_tmp_tmp_interaction'] * group_data['irrigation12'] * group_data['lnfer'] * group_data['tmp']*2)*100
    else:
        group_data['y_temp'] = (reg.params['tmp_tmp_interaction'] * group_data['tmp']*2 +
            reg.params['tmp'] + reg.params['lnfer_tmp_interaction'] * group_data['lnfer'] +
            reg.params['lnfer_tmp_tmp_interaction'] * group_data['lnfer'] * group_data['tmp']*2
        )*100
    group_dfs.append(group_data)

# Concatenate the dataframes for all groups
data = pd.concat(group_dfs)

# Filter data for the year 2020
year_2020_data = data[data['year'] == 2020]
# Convert the 'geoid' column in 'year_2020_data' to 'object' data type
year_2020_data['geoid'] = year_2020_data['geoid'].astype(str)


# Load US county boundaries shapefile using geopandas
shapefile_path = '/Users/chenchenren/postdoc/paper/2N and water-US/Data/cb_2018_us_county_20m/surface_shapefile.shp'
gdf_county = gpd.read_file(shapefile_path)

# Ensure 'GEOID' column in the shapefile has the 'object' data type
gdf_county['GEOID'] = gdf_county['GEOID'].astype(str)

# Rename the 'geoid' column to 'GEOID' in year_2020_data
year_2020_data.rename(columns={'geoid': 'GEOID'}, inplace=True)

# Now, merge 'year_2020_data' with county boundaries based on 'GEOID' attribute
merged_data = gdf_county.merge(year_2020_data, on='GEOID', how='left')

# Filter the data to include only geometries within the specified longitude and latitude range
filtered_data = merged_data.cx[-130:-60, 25:50]

# Create a custom colormap with white color for 0 values and normalize it to the data range
cmap = mcolors.ListedColormap(['white'] + list(plt.cm.RdYlGn(np.linspace(0, 1, 255))))
norm = mcolors.Normalize(vmin=-11, vmax=11)
filtered_data.loc[filtered_data['y_temp'] > 11, 'y_temp'] = 11
filtered_data.loc[filtered_data['y_temp'] < -11, 'y_temp'] = 11


# Plot both county boundaries and 'y_temp' values on the same map within the specified range
fig, ax = plt.subplots(figsize=(10, 4))
shapefile_path = '/Users/chenchenren/postdoc/paper/2N and water-US/Data/cb_2018_us_state_20m/cb_2018_us_state_20m.shp'
gdf_state = gpd.read_file(shapefile_path)
gdf_state.rename(columns={'GEOID': 'GEOID_state'}, inplace=True)
gdf_state = gdf_state.cx[-130:-60, 25:50]
gdf_state.boundary.plot(ax=ax, linewidth=0.3, color='grey')  # Plot county boundaries in grey

sc = plt.scatter([], [], c=[], cmap=cmap, norm=norm)  # Create an empty ScalarMappable for the colorbar

# Define custom colorbar ticks
custom_ticks = [-10, -5, 0, 5, 10]

# Adjust the height of the colorbar by setting the shrink parameter
cbar = plt.colorbar(sc, ax=ax, label='Yield change (%)', orientation='vertical',  pad=-0.02, shrink=0.8, ticks=custom_ticks)

# Set the font size of the colorbar label
# Set the colorbar label text, font size, and padding
cbar.ax.set_ylabel('Yield change (%)', fontsize=16, fontfamily='Arial')
cbar.ax.tick_params(labelsize=16)  # Adjust tick label fontsize

filtered_data.plot(column='y_temp', cmap=cmap, ax=ax, norm=norm)

# Add labels, titles, or other plot configurations as needed
ax.set_xticks([])  # Remove x-axis ticks
ax.set_yticks([])  # Remove y-axis ticks
ax.set_frame_on(False)  # Turn off the frame
plt.text(0.27, 0.22, '(a) Maize', transform=plt.gcf().transFigure,
         fontsize=18, fontweight='bold', fontfamily='Arial', ha='left', va='bottom')


# Save the entire figure as a JPG with DPI 300
plt.savefig('maize_tmp.jpg', format='jpg', dpi=300, bbox_inches='tight', pad_inches=0.1)

# Show the plot
plt.show()

/var/folders/vd/0_phd7hx2n51y4412862zww00000gp/T/ipykernel_5814/68807675.py:60: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  year_2020_data['geoid'] = year_2020_data['geoid'].astype(str)
/var/folders/vd/0_phd7hx2n51y4412862zww00000gp/T/ipykernel_5814/68807675.py:71: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  year_2020_data.rename(columns={'geoid': 'GEOID'}, inplace=True)


# Load your data
excel_file = "sensitivity.xlsx"
sheet_name = "maize_pre"
data = pd.read_excel(excel_file, sheet_name=sheet_name)

# Create interaction terms
data['pre_pre_interaction'] = data['pre'] * data['pre']
data['irrigation12'] = data['irrigation1'] * data['irrigation1']
data['lnfer_irrigation1_pre_interaction'] = data['lnfer'] * data['irrigation1'] * data['pre']
data['lnfer_irrigation12_pre_pre_interaction'] = data['lnfer']* data['irrigation1'] * data['irrigation1'] * data['pre'] * data['pre']

data['lnfer_pre_interaction'] = data['lnfer'] * data['pre']
data['lnfer_pre_pre_interaction'] = data['lnfer'] * data['pre'] * data['pre']

# Define predictor variables and response variable
predictor_variables1 = ['lnfer', 'pre', 'irrigation1', 'irrigation12', 'pre_pre_interaction',
                       'lnfer_irrigation1_pre_interaction', 'lnfer_irrigation12_pre_pre_interaction']

predictor_variables2 = ['lnfer', 'pre', 'pre_pre_interaction', 'irrigation1',
                       'lnfer_pre_interaction', 'lnfer_pre_pre_interaction']

response_variable = 'lnyield'

# Get unique group values
group_values = data['group'].unique()

# Initialize an empty list to store data for each group
group_dfs = []
for group_idx, group in enumerate(group_values):
    # Filter data for the current group
    group_data = data[data['group'] == group].copy()  # Make a copy to avoid warnings
    
    # Select predictor variables based on the group
    predictor_variables = predictor_variables1 if group == 1 else predictor_variables2
    
    # Perform regression analysis for the current group
    X = sm.add_constant(group_data[predictor_variables])
    y = group_data[response_variable]
    reg = sm.OLS(y, X).fit()
    
    group_data = data[data['group'] == group].copy()
    if group == 1:
        group_data['y_pre'] =  (reg.params['pre_pre_interaction'] * group_data['pre']*2 +
                          reg.params['pre']  +
                          reg.params['lnfer_irrigation1_pre_interaction'] * group_data['lnfer'] * group_data['irrigation1'] +
                          reg.params['lnfer_irrigation12_pre_pre_interaction'] * group_data['irrigation12'] * group_data['lnfer'] * group_data['pre']*2)*100
    else:
        group_data['y_pre'] = (reg.params['pre_pre_interaction'] * group_data['pre']*2 +
            reg.params['pre'] + reg.params['lnfer_pre_interaction'] * group_data['lnfer'] +
            reg.params['lnfer_pre_pre_interaction'] * group_data['lnfer'] * group_data['pre']*2
        )*100
    group_dfs.append(group_data)

# Concatenate the dataframes for all groups
data = pd.concat(group_dfs)

# Filter data for the year 2020
year_2020_data = data[data['year'] == 2020]
# Convert the 'geoid' column in 'year_2020_data' to 'object' data type
year_2020_data['geoid'] = year_2020_data['geoid'].astype(str)
ds_pre=year_2020_data[year_2020_data['y_pre']>0]
print(year_2020_data)
print(ds_pre)

# Load US county boundaries shapefile using geopandas
shapefile_path = '/Users/chenchenren/postdoc/paper/2N and water-US/Data/cb_2018_us_county_20m/surface_shapefile.shp'
gdf_county = gpd.read_file(shapefile_path)

# Ensure 'GEOID' column in the shapefile has the 'object' data type
gdf_county['GEOID'] = gdf_county['GEOID'].astype(str)

# Rename the 'geoid' column to 'GEOID' in year_2020_data
year_2020_data.rename(columns={'geoid': 'GEOID'}, inplace=True)

# Now, merge 'year_2020_data' with county boundaries based on 'GEOID' attribute
merged_data = gdf_county.merge(year_2020_data, on='GEOID', how='left')

# Filter the data to include only geometries within the specified longitude and latitude range
filtered_data = merged_data.cx[-130:-60, 25:50]

# Create a custom colormap with white color for 0 values and normalize it to the data range
cmap = mcolors.ListedColormap(['white'] + list(plt.cm.RdYlGn(np.linspace(0, 1, 255))))
norm = mcolors.Normalize(vmin=-11, vmax=11)
filtered_data.loc[filtered_data['y_pre'] > 11, 'y_pre'] = 11
filtered_data.loc[filtered_data['y_pre'] < -11, 'y_pre'] = 11

# Plot both county boundaries and 'y_temp' values on the same map within the specified range
fig, ax = plt.subplots(figsize=(10, 4))
shapefile_path = '/Users/chenchenren/postdoc/paper/2N and water-US/Data/cb_2018_us_state_20m/cb_2018_us_state_20m.shp'
gdf_state = gpd.read_file(shapefile_path)
gdf_state.rename(columns={'GEOID': 'GEOID_state'}, inplace=True)
gdf_state = gdf_state.cx[-130:-60, 25:50]
gdf_state.boundary.plot(ax=ax, linewidth=0.3, color='grey')  # Plot county boundaries in grey
sc = plt.scatter([], [], c=[], cmap=cmap, norm=norm)  # Create an empty ScalarMappable for the colorbar

# Define custom colorbar ticks
custom_ticks = [-10, -5, 0, 5, 10]

# Adjust the height of the colorbar by setting the shrink parameter
cbar = plt.colorbar(sc, ax=ax, label='Yield change (%)', orientation='vertical', pad=-0.02, shrink=0.8,  ticks=custom_ticks)

# Set the font size of the colorbar label
# Set the colorbar label text, font size, and padding
cbar.ax.set_ylabel('Yield change (%)', fontsize=16, fontfamily='Arial')
cbar.ax.tick_params(labelsize=16)  # Adjust tick label fontsize

filtered_data.plot(column='y_pre', cmap=cmap, ax=ax, norm=norm)

# Add labels, titles, or other plot configurations as needed
ax.set_xticks([])  # Remove x-axis ticks
ax.set_yticks([])  # Remove y-axis ticks
ax.set_frame_on(False)  # Turn off the frame
plt.text(0.27, 0.22, '(b) Maize', transform=plt.gcf().transFigure,
         fontsize=18, fontweight='bold', fontfamily='Arial', ha='left', va='bottom')

# Save the entire figure as a JPG with DPI 300
plt.savefig('maize_pre.jpg', format='jpg', dpi=300, bbox_inches='tight', pad_inches=0.1)

# Show the plot
plt.show()

/var/folders/vd/0_phd7hx2n51y4412862zww00000gp/T/ipykernel_5814/3712000809.py:60: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  year_2020_data['geoid'] = year_2020_data['geoid'].astype(str)
/var/folders/vd/0_phd7hx2n51y4412862zww00000gp/T/ipykernel_5814/3712000809.py:73: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  year_2020_data.rename(columns={'geoid': 'GEOID'}, inplace=True)

       geoid  year        pre  irrigation1     lnfer   lnyield  group  \
5       1001  2020  10.057620     0.235228  6.432470  4.192275      1   
17      1003  2020  12.874060     0.000427  5.481153  3.767513      1   
26      1005  2020   9.962674     0.215025  6.248376  4.104936      1   
33      1009  2020  10.261350     0.884062  6.445511  4.200965      1   
40      1013  2020  10.629650     0.001124  6.965954  4.455058      1   
...      ...   ...        ...          ...       ...       ...    ...   
19459  55133  2020   7.534492     0.000000  5.466717  4.037972      2   
19472  55135  2020   7.999479     0.000000  5.402363  4.023465      2   
19485  55137  2020   7.889553     0.000000  5.530668  4.096843      2   
19497  55139  2020   7.754859     0.000000  5.212457  3.893867      2   
19510  55141  2020   7.967925     0.000000  6.442788  4.662855      2   

       pre_pre_interaction  irrigation12  lnfer_irrigation1_pre_interaction  \
5               101.155720  5.533231e-02                          15.218168   
17              165.741421  1.827562e-07                           0.030166   
26               99.254873  4.623575e-02                          13.385421   
33              105.295304  7.815649e-01                          58.471520   
40              112.989459  1.262926e-06                           0.083213   
...                    ...           ...                                ...   
19459            56.768570  0.000000e+00                           0.000000   
19472            63.991664  0.000000e+00                           0.000000   
19485            62.245047  0.000000e+00                           0.000000   
19497            60.137838  0.000000e+00                           0.000000   
19510            63.487829  0.000000e+00                           0.000000   

       lnfer_irrigation12_pre_pre_interaction  lnfer_pre_interaction  \
5                                   36.003688              64.695339   
17                                   0.000166              70.564693   
26                                  28.674570              62.250533   
33                                 530.434069              66.139644   
40                                   0.000994              74.045653   
...                                       ...                    ...   
19459                                0.000000              41.188936   
19472                                0.000000              43.216089   
19485                                0.000000              43.634498   
19497                                0.000000              40.421869   
19510                                0.000000              51.335652   

       lnfer_pre_pre_interaction     y_pre  
5                     650.681135  2.780291  
17                    908.454086 -1.935113  
26                    620.181768  2.981209  
33                    678.682039  1.695591  
40                    787.079375  2.054310  
...                          ...       ...  
19459                 310.337705  5.752193  
19472                 345.706199  5.038266  
19485                 344.256687  5.253525  
19497                 313.465895  5.321767  
19510                 409.038622  5.512467  

[1629 rows x 14 columns]
       geoid  year        pre  irrigation1     lnfer   lnyield  group  \
5       1001  2020  10.057620     0.235228  6.432470  4.192275      1   
26      1005  2020   9.962674     0.215025  6.248376  4.104936      1   
33      1009  2020  10.261350     0.884062  6.445511  4.200965      1   
40      1013  2020  10.629650     0.001124  6.965954  4.455058      1   
49      1015  2020  10.575430     0.701355  6.420520  4.195369      1   
...      ...   ...        ...          ...       ...       ...    ...   
19459  55133  2020   7.534492     0.000000  5.466717  4.037972      2   
19472  55135  2020   7.999479     0.000000  5.402363  4.023465      2   
19485  55137  2020   7.889553     0.000000  5.530668  4.096843      2   
19497  55139  2020   7.754859     0.000000  5.212457  3.893867      2   
19510  55141  2020   7.967925     0.000000  6.442788  4.662855      2   

       pre_pre_interaction  irrigation12  lnfer_irrigation1_pre_interaction  \
5               101.155720      0.055332                          15.218168   
26               99.254873      0.046236                          13.385421   
33              105.295304      0.781565                          58.471520   
40              112.989459      0.000001                           0.083213   
49              111.839720      0.491899                          47.621850   
...                    ...           ...                                ...   
19459            56.768570      0.000000                           0.000000   
19472            63.991664      0.000000                           0.000000   
19485            62.245047      0.000000                           0.000000   
19497            60.137838      0.000000                           0.000000   
19510            63.487829      0.000000                           0.000000   

       lnfer_irrigation12_pre_pre_interaction  lnfer_pre_interaction  \
5                                   36.003688              64.695339   
26                                  28.674570              62.250533   
33                                 530.434069              66.139644   
40                                   0.000994              74.045653   
49                                 353.217584              67.899760   
...                                       ...                    ...   
19459                                0.000000              41.188936   
19472                                0.000000              43.216089   
19485                                0.000000              43.634498   
19497                                0.000000              40.421869   
19510                                0.000000              51.335652   

       lnfer_pre_pre_interaction     y_pre  
5                     650.681135  2.780291  
26                    620.181768  2.981209  
33                    678.682039  1.695591  
40                    787.079375  2.054310  
49                    718.069157  1.333126  
...                          ...       ...  
19459                 310.337705  5.752193  
19472                 345.706199  5.038266  
19485                 344.256687  5.253525  
19497                 313.465895  5.321767  
19510                 409.038622  5.512467  

[1568 rows x 14 columns]


# Load your data
excel_file = "sensitivity.xlsx"
sheet_name = "soybean"
data = pd.read_excel(excel_file, sheet_name=sheet_name)

# Create interaction terms
data['tmp_tmp_interaction'] = data['tmp'] * data['tmp']
data['irrigation12'] = data['irrigation1'] * data['irrigation1']
data['lnfer_irrigation1_tmp_interaction'] = data['lnfer'] * data['irrigation1'] * data['tmp']
data['lnfer_irrigation12_tmp_tmp_interaction'] = data['lnfer']* data['irrigation1'] * data['irrigation1'] * data['tmp'] * data['tmp']

data['lnfer_tmp_interaction'] = data['lnfer'] * data['tmp']
data['lnfer_tmp_tmp_interaction'] = data['lnfer'] * data['tmp'] * data['tmp']

# Define predictor variables and response variable
predictor_variables1 = ['lnfer', 'tmp', 'irrigation1', 'irrigation12', 'tmp_tmp_interaction',
                       'lnfer_irrigation1_tmp_interaction', 'lnfer_irrigation12_tmp_tmp_interaction']

predictor_variables2 = ['lnfer', 'tmp', 'tmp_tmp_interaction', 'irrigation1',
                       'lnfer_tmp_interaction', 'lnfer_tmp_tmp_interaction']

response_variable = 'lnyield'

# Get unique group values
group_values = data['group'].unique()

# Initialize an empty list to store data for each group
group_dfs = []
for group_idx, group in enumerate(group_values):
    # Filter data for the current group
    group_data = data[data['group'] == group].copy()  # Make a copy to avoid warnings
    
    # Select predictor variables based on the group
    predictor_variables = predictor_variables1 if group == 1 else predictor_variables2
    
    # Perform regression analysis for the current group
    X = sm.add_constant(group_data[predictor_variables])
    y = group_data[response_variable]
    reg = sm.OLS(y, X).fit()
    
    group_data = data[data['group'] == group].copy()
    if group == 1:
        group_data['y_temp'] =  (reg.params['tmp_tmp_interaction'] * group_data['tmp']*2 +
                          reg.params['tmp']  +
                          reg.params['lnfer_irrigation1_tmp_interaction'] * group_data['lnfer'] * group_data['irrigation1'] +
                          reg.params['lnfer_irrigation12_tmp_tmp_interaction'] * group_data['irrigation12'] * group_data['lnfer'] * group_data['tmp']*2)*100
    else:
        group_data['y_temp'] = (reg.params['tmp_tmp_interaction'] * group_data['tmp']*2 +
            reg.params['tmp'] + reg.params['lnfer_tmp_interaction'] * group_data['lnfer'] +
            reg.params['lnfer_tmp_tmp_interaction'] * group_data['lnfer'] * group_data['tmp']*2
        )*100
    group_dfs.append(group_data)

# Concatenate the dataframes for all groups
data = pd.concat(group_dfs)

# Filter data for the year 2020
year_2020_data = data[data['year'] == 2020]
# Convert the 'geoid' column in 'year_2020_data' to 'object' data type
year_2020_data['geoid'] = year_2020_data['geoid'].astype(str)



# Load US county boundaries shapefile using geopandas
shapefile_path = '/Users/chenchenren/postdoc/paper/2N and water-US/Data/cb_2018_us_county_20m/surface_shapefile.shp'
gdf_county = gpd.read_file(shapefile_path)

# Ensure 'GEOID' column in the shapefile has the 'object' data type
gdf_county['GEOID'] = gdf_county['GEOID'].astype(str)

# Rename the 'geoid' column to 'GEOID' in year_2020_data
year_2020_data.rename(columns={'geoid': 'GEOID'}, inplace=True)

# Now, merge 'year_2020_data' with county boundaries based on 'GEOID' attribute
merged_data = gdf_county.merge(year_2020_data, on='GEOID', how='left')

# Filter the data to include only geometries within the specified longitude and latitude range
filtered_data = merged_data.cx[-130:-60, 25:50]

# Calculate the minimum and maximum values of 'y_temp' for normalization
min_temp = filtered_data['y_temp'].min()
max_temp = filtered_data['y_temp'].max()


# Create a custom colormap with an emphasis on the specified range
cmap = mcolors.ListedColormap(['white'] + list(plt.cm.PuOr(np.linspace(0, 1, 255))))
norm = mcolors.Normalize(vmin=-11, vmax=11)
filtered_data.loc[filtered_data['y_temp'] > 11, 'y_temp'] = 11
filtered_data.loc[filtered_data['y_temp'] < -11, 'y_temp'] = 11

# Plot both county boundaries and 'y_temp' values on the same map within the specified range
fig, ax = plt.subplots(figsize=(10, 4))
shapefile_path = '/Users/chenchenren/postdoc/paper/2N and water-US/Data/cb_2018_us_state_20m/cb_2018_us_state_20m.shp'
gdf_state = gpd.read_file(shapefile_path)
gdf_state.rename(columns={'GEOID': 'GEOID_state'}, inplace=True)
gdf_state = gdf_state.cx[-130:-60, 25:50]
gdf_state.boundary.plot(ax=ax, linewidth=0.3, color='grey')  # Plot county boundaries in grey
sc = plt.scatter([], [], c=[], cmap=cmap, norm=norm)  # Create an empty ScalarMappable for the colorbar

# Define custom colorbar ticks
custom_ticks = [ -10, -5, 0, 5, 10]

# Adjust the height of the colorbar by setting the shrink parameter
cbar = plt.colorbar(sc, ax=ax, label='Yield change (%)', orientation='vertical', pad=-0.02, shrink=0.8, ticks=custom_ticks)

# Set the font size of the colorbar label
# Set the colorbar label text, font size, and padding
cbar.ax.set_ylabel('Yield change (%)', fontsize=16, fontfamily='Arial')
cbar.ax.tick_params(labelsize=16)  # Adjust tick label fontsize

filtered_data.plot(column='y_temp', cmap=cmap, ax=ax, norm=norm)

# Add labels, titles, or other plot configurations as needed
ax.set_xticks([])  # Remove x-axis ticks
ax.set_yticks([])  # Remove y-axis ticks
ax.set_frame_on(False)  # Turn off the frame
plt.text(0.27, 0.22, '(c) Soybean', transform=plt.gcf().transFigure,
         fontsize=18, fontweight='bold', fontfamily='Arial', ha='left', va='bottom')

# Save the entire figure as a JPG with DPI 300
plt.savefig('soybean_tmp.jpg', format='jpg', dpi=300, bbox_inches='tight', pad_inches=0.1)

# Show the plot
plt.show()

/var/folders/vd/0_phd7hx2n51y4412862zww00000gp/T/ipykernel_17107/1920904350.py:60: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  year_2020_data['geoid'] = year_2020_data['geoid'].astype(str)
/var/folders/vd/0_phd7hx2n51y4412862zww00000gp/T/ipykernel_17107/1920904350.py:72: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  year_2020_data.rename(columns={'geoid': 'GEOID'}, inplace=True)


# Load your data
excel_file = "sensitivity.xlsx"
sheet_name = "soybean_pre"
data = pd.read_excel(excel_file, sheet_name=sheet_name)

# Create interaction terms
data['pre_pre_interaction'] = data['pre'] * data['pre']
data['irrigation12'] = data['irrigation1'] * data['irrigation1']
data['lnfer_irrigation1_pre_interaction'] = data['lnfer'] * data['irrigation1'] * data['pre']
data['lnfer_irrigation12_pre_pre_interaction'] = data['lnfer']* data['irrigation1'] * data['irrigation1'] * data['pre'] * data['pre']

data['lnfer_pre_interaction'] = data['lnfer'] * data['pre']
data['lnfer_pre_pre_interaction'] = data['lnfer'] * data['pre'] * data['pre']

# Define predictor variables and response variable
predictor_variables1 = ['lnfer', 'pre', 'irrigation1', 'irrigation12', 'pre_pre_interaction',
                       'lnfer_irrigation1_pre_interaction', 'lnfer_irrigation12_pre_pre_interaction']

predictor_variables2 = ['lnfer', 'pre', 'pre_pre_interaction', 'irrigation1',
                       'lnfer_pre_interaction', 'lnfer_pre_pre_interaction']

response_variable = 'lnyield'

# Get unique group values
group_values = data['group'].unique()

# Initialize an empty list to store data for each group
group_dfs = []
for group_idx, group in enumerate(group_values):
    # Filter data for the current group
    group_data = data[data['group'] == group].copy()  # Make a copy to avoid warnings
    
    # Select predictor variables based on the group
    predictor_variables = predictor_variables1 if group == 1 else predictor_variables2
    
    # Perform regression analysis for the current group
    X = sm.add_constant(group_data[predictor_variables])
    y = group_data[response_variable]
    reg = sm.OLS(y, X).fit()
    
    group_data = data[data['group'] == group].copy()
    if group == 1:
        group_data['y_pre'] =  (reg.params['pre_pre_interaction'] * group_data['pre']*2 +
                          reg.params['pre']  +
                          reg.params['lnfer_irrigation1_pre_interaction'] * group_data['lnfer'] * group_data['irrigation1'] +
                          reg.params['lnfer_irrigation12_pre_pre_interaction'] * group_data['irrigation12'] * group_data['lnfer'] * group_data['pre']*2)*100
    else:
        group_data['y_pre'] = (reg.params['pre_pre_interaction'] * group_data['pre']*2 +
            reg.params['pre'] + reg.params['lnfer_pre_interaction'] * group_data['lnfer'] +
            reg.params['lnfer_pre_pre_interaction'] * group_data['lnfer'] * group_data['pre']*2
        )*100
    group_dfs.append(group_data)

# Concatenate the dataframes for all groups
data = pd.concat(group_dfs)

# Filter data for the year 2020
year_2020_data = data[data['year'] == 2020]
# Convert the 'geoid' column in 'year_2020_data' to 'object' data type
year_2020_data['geoid'] = year_2020_data['geoid'].astype(str)


# Load US county boundaries shapefile using geopandas
shapefile_path = '/Users/chenchenren/postdoc/paper/2N and water-US/Data/cb_2018_us_county_20m/surface_shapefile.shp'
gdf_county = gpd.read_file(shapefile_path)

# Ensure 'GEOID' column in the shapefile has the 'object' data type
gdf_county['GEOID'] = gdf_county['GEOID'].astype(str)

# Rename the 'geoid' column to 'GEOID' in year_2020_data
year_2020_data.rename(columns={'geoid': 'GEOID'}, inplace=True)

# Now, merge 'year_2020_data' with county boundaries based on 'GEOID' attribute
merged_data = gdf_county.merge(year_2020_data, on='GEOID', how='left')

# Filter the data to include only geometries within the specified longitude and latitude range
filtered_data = merged_data.cx[-130:-60, 25:50]

# Create a custom colormap with an emphasis on the specified range
cmap = mcolors.ListedColormap(['white'] + list(plt.cm.PuOr(np.linspace(0, 1, 255))))
norm = mcolors.Normalize(vmin=-11, vmax=11)
filtered_data.loc[filtered_data['y_pre'] > 11, 'y_pre'] = 11
filtered_data.loc[filtered_data['y_pre'] < -11, 'y_pre'] = 11

# Plot both county boundaries and 'y_temp' values on the same map within the specified range
fig, ax = plt.subplots(figsize=(10, 4))
shapefile_path = '/Users/chenchenren/postdoc/paper/2N and water-US/Data/cb_2018_us_state_20m/cb_2018_us_state_20m.shp'
gdf_state = gpd.read_file(shapefile_path)
gdf_state.rename(columns={'GEOID': 'GEOID_state'}, inplace=True)
gdf_state = gdf_state.cx[-130:-60, 25:50]
gdf_state.boundary.plot(ax=ax, linewidth=0.3, color='grey')  # Plot county boundaries in grey
sc = plt.scatter([], [], c=[], cmap=cmap, norm=norm)  # Create an empty ScalarMappable for the colorbar

# Define custom colorbar ticks
custom_ticks = [ -10, -5, 0, 5, 10]

# Adjust the height of the colorbar by s

# Adjust the height of the colorbar by setting the shrink parameter
cbar = plt.colorbar(sc, ax=ax, label='Yield change (%)', orientation='vertical', pad=-0.02, shrink=0.8,  ticks=custom_ticks)

# Set the font size of the colorbar label
# Set the colorbar label text, font size, and padding
cbar.ax.set_ylabel('Yield change (%)', fontsize=16, fontfamily='Arial')
cbar.ax.tick_params(labelsize=16)  # Adjust tick label fontsize

filtered_data.plot(column='y_pre', cmap=cmap, ax=ax, norm=norm)

# Add labels, titles, or other plot configurations as needed
ax.set_xticks([])  # Remove x-axis ticks
ax.set_yticks([])  # Remove y-axis ticks
ax.set_frame_on(False)  # Turn off the frame
plt.text(0.27, 0.22, '(d) Soybean', transform=plt.gcf().transFigure,
         fontsize=18, fontweight='bold', fontfamily='Arial', ha='left', va='bottom')

# Save the entire figure as a JPG with DPI 300
plt.savefig('soybean_pre.jpg', format='jpg', dpi=300, bbox_inches='tight', pad_inches=0.1)

# Show the plot
plt.show()

/var/folders/vd/0_phd7hx2n51y4412862zww00000gp/T/ipykernel_17107/3746002272.py:60: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  year_2020_data['geoid'] = year_2020_data['geoid'].astype(str)
/var/folders/vd/0_phd7hx2n51y4412862zww00000gp/T/ipykernel_17107/3746002272.py:71: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  year_2020_data.rename(columns={'geoid': 'GEOID'}, inplace=True)