import pandas as pd
import numpy as np
import statsmodels.api as sm
import matplotlib.pyplot as plt
import matplotlib.colors as mcolors

import netCDF4 as nc
import pandas as pd
import os
import csv
from glob import glob
import xarray as xr
import matplotlib.ticker as ticker 

import numpy as np
import pandas as pd
import statsmodels.api as sm
import matplotlib.pyplot as plt
from matplotlib.gridspec import GridSpec

import numpy as np
import pandas as pd
import statsmodels.api as sm
import matplotlib.pyplot as plt
from matplotlib.gridspec import GridSpec
from matplotlib.colors import LinearSegmentedColormap, Normalize
from mpl_toolkits.axes_grid1 import make_axes_locatable

import geopandas as gpd
import matplotlib.colors as mcolors


# Step 1: Data Preparation
# Load your dataset into a pandas DataFrame
os.chdir('/Users/chenchenren/postdoc/paper/2N and water-US/Figure 4 Pattern/')


# Load data from an Excel file
excel_file = "pattern_base.xlsx"
sheet_name = "soybean"
data = pd.read_excel(excel_file, sheet_name=sheet_name)

# Calculate the mean of 'gap_production' within each unique combination of 'scenario' and 'geoid'
print(np.min(data['yield']),np.max(data['yield']),np.mean(data['yield']))
print(np.percentile(data['yield'], 5),np.percentile(data['yield'], 95))

p95 = np.percentile(data['yield'], 95)
# Replace values in 'gap_yield' greater than p90 with p90
data.loc[data['yield'] > 4.1, 'yield'] = 4.1
data.loc[data['yield'] < 0.9, 'yield'] = 0.9

data['geoid'] = data['geoid'].astype(float)

# Load US county boundaries shapefile using geopandas
shapefile_path = '/Users/chenchenren/postdoc/paper/2N and water-US/Data/cb_2018_us_county_20m/surface_shapefile.shp'
gdf_county = gpd.read_file(shapefile_path)

# Ensure 'GEOID' column in the shapefile has the 'object' data type
gdf_county['GEOID'] = gdf_county['GEOID'].astype(float)

# Rename the 'geoid' column to 'GEOID' in year_2020_data
data.rename(columns={'geoid': 'GEOID'}, inplace=True)

# Convert the 'GEOID' column in the 'data' DataFrame to 'object' data type
data['GEOID'] = data['GEOID'].astype(float)
# Now, merge 'data' with county boundaries based on 'GEOID' attribute
merged_data = gdf_county.merge(data, on='GEOID', how='left')

# Filter the data to include only geometries within the specified longitude and latitude range
filtered_data = merged_data.cx[-130:-60, 25:50]

cmap = mcolors.ListedColormap(['white'] + list(plt.cm.YlOrBr(np.linspace(0, 1, 255))))
norm = mcolors.Normalize(vmin=0.8, vmax=4.2)

# Plot both county boundaries and 'y_temp' values on the same map within the specified range
fig, ax = plt.subplots(figsize=(10, 4))
shapefile_path = '/Users/chenchenren/postdoc/paper/2N and water-US/Data/cb_2018_us_state_20m/cb_2018_us_state_20m.shp'
gdf_state = gpd.read_file(shapefile_path)
gdf_state.rename(columns={'GEOID': 'GEOID_state'}, inplace=True)
gdf_state = gdf_state.cx[-130:-60, 25:50]
gdf_state.boundary.plot(ax=ax, linewidth=0.3, color='grey')  # Plot county boundaries in grey

sc = plt.scatter([], [], c=[], cmap=cmap, norm=norm)  # Create an empty ScalarMappable for the colorbar

# Define custom colorbar ticks
custom_ticks = [1, 2, 3, 4]

# Adjust the height of the colorbar by setting the shrink parameter
cbar = plt.colorbar(sc, ax=ax, label='Yield (t/ha)', orientation='vertical', pad=-0.02, shrink=0.8, ticks=custom_ticks)

# Set the font size of the colorbar label
# Set the colorbar label text, font size, and padding
cbar.ax.set_ylabel('Yield (tonnes/ha/yr)', fontsize=16, fontfamily='Arial')
cbar.ax.tick_params(labelsize=16)  # Adjust tick label fontsize

filtered_data.plot(column='yield', cmap=cmap, ax=ax, norm=norm)

# Add labels, titles, or other plot configurations as needed
ax.set_xticks([])  # Remove x-axis ticks
ax.set_yticks([])  # Remove y-axis ticks
ax.set_frame_on(False)  # Turn off the frame
plt.text(0.25, 0.2, '(d) Soybean', transform=plt.gcf().transFigure,
         fontsize=18, fontweight='bold', fontfamily='Arial', ha='left', va='bottom')

# Save the entire figure as a JPG with DPI 300
plt.savefig('soybean_yield.jpg', format='jpg', dpi=300, bbox_inches='tight', pad_inches=0.1)

# Show the plot
plt.show()

0.5858859 4.425745 2.799098633897349
1.7720524000000002 3.7696726


# Load data from an Excel file
excel_file = "pattern_base.xlsx"
sheet_name = "soybean"
data = pd.read_excel(excel_file, sheet_name=sheet_name)

# Calculate the mean of 'gap_production' within each unique combination of 'scenario' and 'geoid'
print(np.min(data['production']),np.max(data['production']),np.mean(data['production']))
print(np.percentile(data['production'], 5),np.percentile(data['production'], 95))
print(np.sum(data['production']))

# Replace values in 'gap_yield' greater than p90 with p90
data.loc[data['production'] > 19, 'production'] = 19
data.loc[data['production'] < 0.3, 'production'] = 0.3

data['geoid'] = data['geoid'].astype(float)

# Load US county boundaries shapefile using geopandas
shapefile_path = '/Users/chenchenren/postdoc/paper/2N and water-US/Data/cb_2018_us_county_20m/surface_shapefile.shp'
gdf_county = gpd.read_file(shapefile_path)

# Ensure 'GEOID' column in the shapefile has the 'object' data type
gdf_county['GEOID'] = gdf_county['GEOID'].astype(float)

# Rename the 'geoid' column to 'GEOID' in year_2020_data
data.rename(columns={'geoid': 'GEOID'}, inplace=True)

# Convert the 'GEOID' column in the 'data' DataFrame to 'object' data type
data['GEOID'] = data['GEOID'].astype(float)
# Now, merge 'data' with county boundaries based on 'GEOID' attribute
merged_data = gdf_county.merge(data, on='GEOID', how='left')

# Filter the data to include only geometries within the specified longitude and latitude range
filtered_data = merged_data.cx[-130:-60, 25:50]

cmap = mcolors.ListedColormap(['white'] + list(plt.cm.RdPu(np.linspace(0, 1, 255))))
norm = mcolors.Normalize(vmin=0, vmax=20)

# Plot both county boundaries and 'y_temp' values on the same map within the specified range
fig, ax = plt.subplots(figsize=(10, 4))
shapefile_path = '/Users/chenchenren/postdoc/paper/2N and water-US/Data/cb_2018_us_state_20m/cb_2018_us_state_20m.shp'
gdf_state = gpd.read_file(shapefile_path)
gdf_state.rename(columns={'GEOID': 'GEOID_state'}, inplace=True)
gdf_state = gdf_state.cx[-130:-60, 25:50]
gdf_state.boundary.plot(ax=ax, linewidth=0.3, color='grey')  # Plot county boundaries in grey

sc = plt.scatter([], [], c=[], cmap=cmap, norm=norm)  # Create an empty ScalarMappable for the colorbar

# Define custom colorbar ticks
custom_ticks = [0, 6, 12, 18]

# Adjust the height of the colorbar by setting the shrink parameter
cbar = plt.colorbar(sc, ax=ax, label='Production ($10^{4}$ t)', orientation='vertical', pad=-0.02, shrink=0.8, ticks=custom_ticks)

# Set the font size of the colorbar label
# Set the colorbar label text, font size, and padding
cbar.ax.set_ylabel('Production ($10^{4}$ tonnes/yr)', fontsize=16, fontfamily='Arial')
cbar.ax.tick_params(labelsize=16)  # Adjust tick label fontsize

filtered_data.plot(column='production', cmap=cmap, ax=ax, norm=norm)

# Add labels, titles, or other plot configurations as needed
ax.set_xticks([])  # Remove x-axis ticks
ax.set_yticks([])  # Remove y-axis ticks
ax.set_frame_on(False)  # Turn off the frame
plt.text(0.25, 0.2, '(d) Soybean', transform=plt.gcf().transFigure,
         fontsize=18, fontweight='bold', fontfamily='Arial', ha='left', va='bottom')

# Save the entire figure as a JPG with DPI 300
plt.savefig('soybean_production.jpg', format='jpg', dpi=300, bbox_inches='tight', pad_inches=0.1)

# Show the plot
plt.show()

0.0185206 47.31714 5.572154536717427
0.10839657999999999 18.306892
9879.429993599999


# Load data from an Excel file
excel_file = "pattern_base.xlsx"
sheet_name = "soybean"
data = pd.read_excel(excel_file, sheet_name=sheet_name)

# Calculate the mean of 'gap_production' within each unique combination of 'scenario' and 'geoid'
print(np.min(data['fer']),np.max(data['fer']),np.mean(data['fer']))
print(np.percentile(data['fer'], 5),np.percentile(data['fer'], 95))

# Replace values in 'gap_yield' greater than p90 with p90
data.loc[data['fer'] > 0.43, 'fer'] = 0.43
data.loc[data['fer'] < 0.002, 'fer'] = 0.002

data['geoid'] = data['geoid'].astype(float)

# Load US county boundaries shapefile using geopandas
shapefile_path = '/Users/chenchenren/postdoc/paper/2N and water-US/Data/cb_2018_us_county_20m/surface_shapefile.shp'
gdf_county = gpd.read_file(shapefile_path)

# Ensure 'GEOID' column in the shapefile has the 'object' data type
gdf_county['GEOID'] = gdf_county['GEOID'].astype(float)

# Rename the 'geoid' column to 'GEOID' in year_2020_data
data.rename(columns={'geoid': 'GEOID'}, inplace=True)

# Convert the 'GEOID' column in the 'data' DataFrame to 'object' data type
data['GEOID'] = data['GEOID'].astype(float)
# Now, merge 'data' with county boundaries based on 'GEOID' attribute
merged_data = gdf_county.merge(data, on='GEOID', how='left')

# Filter the data to include only geometries within the specified longitude and latitude range
filtered_data = merged_data.cx[-130:-60, 25:50]

cmap = mcolors.ListedColormap(['white'] + list(plt.cm.Purples(np.linspace(0, 1, 255))))
norm = mcolors.Normalize(vmin=0.0, vmax=0.046)

# Plot both county boundaries and 'y_temp' values on the same map within the specified range
fig, ax = plt.subplots(figsize=(14, 4))
shapefile_path = '/Users/chenchenren/postdoc/paper/2N and water-US/Data/cb_2018_us_state_20m/cb_2018_us_state_20m.shp'
gdf_state = gpd.read_file(shapefile_path)
gdf_state.rename(columns={'GEOID': 'GEOID_state'}, inplace=True)
gdf_state = gdf_state.cx[-130:-60, 25:50]
gdf_state.boundary.plot(ax=ax, linewidth=0.3, color='grey')  # Plot county boundaries in grey

sc = plt.scatter([], [], c=[], cmap=cmap, norm=norm)  # Create an empty ScalarMappable for the colorbar

# Define custom colorbar ticks
custom_ticks = [0, 0.01, 0.02, 0.03, 0.04]

# Adjust the height of the colorbar by setting the shrink parameter
cbar = plt.colorbar(sc, ax=ax, label='Fertilizer (t/ha)', orientation='vertical', pad=-0.02, shrink=0.8, ticks=custom_ticks)

# Set the font size of the colorbar label
# Set the colorbar label text, font size, and padding
cbar.ax.set_ylabel('N input (tonnes/ha/yr)', fontsize=16, fontfamily='Arial')
cbar.ax.tick_params(labelsize=16)  # Adjust tick label fontsize

filtered_data.plot(column='fer', cmap=cmap, ax=ax, norm=norm)

# Add labels, titles, or other plot configurations as needed
ax.set_xticks([])  # Remove x-axis ticks
ax.set_yticks([])  # Remove y-axis ticks
ax.set_frame_on(False)  # Turn off the frame
plt.suptitle('(c) Current soybean N input', fontsize=18, fontweight='bold', fontfamily='Arial', x=0.58, y=0.91)

# Save the entire figure as a JPG with DPI 300
plt.savefig('soybean_fer.jpg', format='jpg', dpi=300, bbox_inches='tight', pad_inches=0.1)

# Show the plot
plt.show()

0.000546 0.6435232 0.02116517084038353
0.00130542 0.0881056599999999


# Load data from an Excel file
excel_file = "pattern_base.xlsx"
sheet_name = "soybean"
data = pd.read_excel(excel_file, sheet_name=sheet_name)

data['fer_t']=data['fer']*data['har']/1000

# Calculate the mean of 'gap_production' within each unique combination of 'scenario' and 'geoid'
print(np.min(data['fer_t']),np.max(data['fer_t']),np.mean(data['fer_t']))
print(np.percentile(data['fer_t'], 5),np.percentile(data['fer_t'], 95))
print(np.sum(data['fer_t']))

# Replace values in 'gap_yield' greater than p90 with p90
data.loc[data['fer_t'] > 0.41, 'fer_t'] = 0.41

data['geoid'] = data['geoid'].astype(float)

# Load US county boundaries shapefile using geopandas
shapefile_path = '/Users/chenchenren/postdoc/paper/2N and water-US/Data/cb_2018_us_county_20m/surface_shapefile.shp'
gdf_county = gpd.read_file(shapefile_path)

# Ensure 'GEOID' column in the shapefile has the 'object' data type
gdf_county['GEOID'] = gdf_county['GEOID'].astype(float)

# Rename the 'geoid' column to 'GEOID' in year_2020_data
data.rename(columns={'geoid': 'GEOID'}, inplace=True)

# Convert the 'GEOID' column in the 'data' DataFrame to 'object' data type
data['GEOID'] = data['GEOID'].astype(float)
# Now, merge 'data' with county boundaries based on 'GEOID' attribute
merged_data = gdf_county.merge(data, on='GEOID', how='left')

# Filter the data to include only geometries within the specified longitude and latitude range
filtered_data = merged_data.cx[-130:-60, 25:50]

cmap = mcolors.ListedColormap(['white'] + list(plt.cm.Purples(np.linspace(0, 1, 255))))
norm = mcolors.Normalize(vmin=0.0, vmax=0.43)

# Plot both county boundaries and 'y_temp' values on the same map within the specified range
fig, ax = plt.subplots(figsize=(14, 4))
shapefile_path = '/Users/chenchenren/postdoc/paper/2N and water-US/Data/cb_2018_us_state_20m/cb_2018_us_state_20m.shp'
gdf_state = gpd.read_file(shapefile_path)
gdf_state.rename(columns={'GEOID': 'GEOID_state'}, inplace=True)
gdf_state = gdf_state.cx[-130:-60, 25:50]
gdf_state.boundary.plot(ax=ax, linewidth=0.3, color='grey')  # Plot county boundaries in grey

sc = plt.scatter([], [], c=[], cmap=cmap, norm=norm)  # Create an empty ScalarMappable for the colorbar

# Define custom colorbar ticks
custom_ticks = [0, 0.1, 0.2, 0.3, 0.4]

# Adjust the height of the colorbar by setting the shrink parameter
cbar = plt.colorbar(sc, ax=ax, label='Fertilizer (t/ha)', orientation='vertical', pad=-0.02, shrink=0.8, ticks=custom_ticks)

# Set the font size of the colorbar label
# Set the colorbar label text, font size, and padding
cbar.ax.set_ylabel('N input (thousand tonnes/yr)', fontsize=16, fontfamily='Arial')
cbar.ax.tick_params(labelsize=16)  # Adjust tick label fontsize

filtered_data.plot(column='fer_t', cmap=cmap, ax=ax, norm=norm)

# Add labels, titles, or other plot configurations as needed
ax.set_xticks([])  # Remove x-axis ticks
ax.set_yticks([])  # Remove y-axis ticks
ax.set_frame_on(False)  # Turn off the frame
plt.suptitle('(b) Current soybean N input', fontsize=18, fontweight='bold', fontfamily='Arial', x=0.58, y=0.91)

# Save the entire figure as a JPG with DPI 300
plt.savefig('soybean_fer_t.jpg', format='jpg', dpi=300, bbox_inches='tight', pad_inches=0.1)

# Show the plot
plt.show()

0.0002963142 1.98933964416 0.11293301114868087
0.005039589938760001 0.39249538291359987
200.2302287666112


# Load data from an Excel file
excel_file = "pattern_base.xlsx"
sheet_name = "soybean"
data = pd.read_excel(excel_file, sheet_name=sheet_name)

# Calculate the mean of 'gap_production' within each unique combination of 'scenario' and 'geoid'
print(np.min(data['irrigation1']),np.max(data['irrigation1']),np.mean(data['irrigation1']))
print(np.percentile(data['irrigation1'], 5),np.percentile(data['irrigation1'], 95))

# Replace values in 'gap_yield' greater than p90 with p90
data.loc[data['irrigation1'] > 0.43, 'irrigation1'] = 0.43
data.loc[data['irrigation1'] < 0, 'irrigation1'] = 0

data['geoid'] = data['geoid'].astype(float)

# Load US county boundaries shapefile using geopandas
shapefile_path = '/Users/chenchenren/postdoc/paper/2N and water-US/Data/cb_2018_us_county_20m/surface_shapefile.shp'
gdf_county = gpd.read_file(shapefile_path)

# Ensure 'GEOID' column in the shapefile has the 'object' data type
gdf_county['GEOID'] = gdf_county['GEOID'].astype(float)

# Rename the 'geoid' column to 'GEOID' in year_2020_data
data.rename(columns={'geoid': 'GEOID'}, inplace=True)

# Convert the 'GEOID' column in the 'data' DataFrame to 'object' data type
data['GEOID'] = data['GEOID'].astype(float)
# Now, merge 'data' with county boundaries based on 'GEOID' attribute
merged_data = gdf_county.merge(data, on='GEOID', how='left')

# Filter the data to include only geometries within the specified longitude and latitude range
filtered_data = merged_data.cx[-130:-60, 25:50]

cmap = mcolors.ListedColormap(['white'] + list(plt.cm.BuGn(np.linspace(0, 1, 255))))
norm = mcolors.Normalize(vmin=0, vmax=0.45)

# Plot both county boundaries and 'y_temp' values on the same map within the specified range
fig, ax = plt.subplots(figsize=(14, 4))
shapefile_path = '/Users/chenchenren/postdoc/paper/2N and water-US/Data/cb_2018_us_state_20m/cb_2018_us_state_20m.shp'
gdf_state = gpd.read_file(shapefile_path)
gdf_state.rename(columns={'GEOID': 'GEOID_state'}, inplace=True)
gdf_state = gdf_state.cx[-130:-60, 25:50]
gdf_state.boundary.plot(ax=ax, linewidth=0.3, color='grey')  # Plot county boundaries in grey

sc = plt.scatter([], [], c=[], cmap=cmap, norm=norm)  # Create an empty ScalarMappable for the colorbar

# Define custom colorbar ticks
custom_ticks = [0, 0.1, 0.2, 0.3, 0.4]

# Adjust the height of the colorbar by setting the shrink parameter
cbar = plt.colorbar(sc, ax=ax, label='Irrigation ($10^{3}$ m$^{3}$/ha/yr)', orientation='vertical', pad=-0.02, shrink=0.8, ticks=custom_ticks)

# Set the font size of the colorbar label
# Set the colorbar label text, font size, and padding
cbar.ax.set_ylabel('Irrigation ($10^{3}$ m$^{3}$/ha/yr)', fontsize=16, fontfamily='Arial')
cbar.ax.tick_params(labelsize=16)  # Adjust tick label fontsize

filtered_data.plot(column='irrigation1', cmap=cmap, ax=ax, norm=norm)

# Add labels, titles, or other plot configurations as needed
ax.set_xticks([])  # Remove x-axis ticks
ax.set_yticks([])  # Remove y-axis ticks
ax.set_frame_on(False)  # Turn off the frame
plt.suptitle('(d) Current soybean irrigation', fontsize=18, fontweight='bold', fontfamily='Arial', x=0.58, y=0.91)

# Save the entire figure as a JPG with DPI 300
plt.savefig('soybean_irrigation.jpg', format='jpg', dpi=300, bbox_inches='tight', pad_inches=0.1)

# Show the plot
plt.show()

0.0 4.094071 0.08704804256397738
0.0 0.5079830799999998


# Load data from an Excel file
excel_file = "pattern_base.xlsx"
sheet_name = "soybean"
data = pd.read_excel(excel_file, sheet_name=sheet_name)

data['irri_t']=data['irrigation1']*data['har']/1000

# Calculate the mean of 'gap_production' within each unique combination of 'scenario' and 'geoid'
print(np.min(data['irri_t']),np.max(data['irri_t']),np.mean(data['irri_t']))
print(np.percentile(data['irri_t'], 5),np.percentile(data['irri_t'], 95))
print(np.sum(data['irri_t']))

# Replace values in 'gap_yield' greater than p90 with p90
data.loc[data['irri_t'] > 3.2, 'irri_t'] = 3.2
data.loc[data['irri_t'] < 0, 'irri_t'] = 0

data['geoid'] = data['geoid'].astype(float)

# Load US county boundaries shapefile using geopandas
shapefile_path = '/Users/chenchenren/postdoc/paper/2N and water-US/Data/cb_2018_us_county_20m/surface_shapefile.shp'
gdf_county = gpd.read_file(shapefile_path)

# Ensure 'GEOID' column in the shapefile has the 'object' data type
gdf_county['GEOID'] = gdf_county['GEOID'].astype(float)

# Rename the 'geoid' column to 'GEOID' in year_2020_data
data.rename(columns={'geoid': 'GEOID'}, inplace=True)

# Convert the 'GEOID' column in the 'data' DataFrame to 'object' data type
data['GEOID'] = data['GEOID'].astype(float)
# Now, merge 'data' with county boundaries based on 'GEOID' attribute
merged_data = gdf_county.merge(data, on='GEOID', how='left')

# Filter the data to include only geometries within the specified longitude and latitude range
filtered_data = merged_data.cx[-130:-60, 25:50]

cmap = mcolors.ListedColormap(['white'] + list(plt.cm.BuGn(np.linspace(0, 1, 255))))
norm = mcolors.Normalize(vmin=0, vmax=3.3)

# Plot both county boundaries and 'y_temp' values on the same map within the specified range
fig, ax = plt.subplots(figsize=(14, 4))
shapefile_path = '/Users/chenchenren/postdoc/paper/2N and water-US/Data/cb_2018_us_state_20m/cb_2018_us_state_20m.shp'
gdf_state = gpd.read_file(shapefile_path)
gdf_state.rename(columns={'GEOID': 'GEOID_state'}, inplace=True)
gdf_state = gdf_state.cx[-130:-60, 25:50]
gdf_state.boundary.plot(ax=ax, linewidth=0.3, color='grey')  # Plot county boundaries in grey

sc = plt.scatter([], [], c=[], cmap=cmap, norm=norm)  # Create an empty ScalarMappable for the colorbar

# Define custom colorbar ticks
custom_ticks = [0, 0.8, 1.6, 2.4, 3.2]

# Adjust the height of the colorbar by setting the shrink parameter
cbar = plt.colorbar(sc, ax=ax, label='Irrigation ($10^{6}$ m$^{3}$/yr)', orientation='vertical', pad=-0.02, shrink=0.8, ticks=custom_ticks)

# Set the font size of the colorbar label
# Set the colorbar label text, font size, and padding
cbar.ax.set_ylabel('Irrigation ($10^{6}$ m$^{3}$/yr)', fontsize=16, fontfamily='Arial')
cbar.ax.tick_params(labelsize=16)  # Adjust tick label fontsize

filtered_data.plot(column='irri_t', cmap=cmap, ax=ax, norm=norm)

# Add labels, titles, or other plot configurations as needed
ax.set_xticks([])  # Remove x-axis ticks
ax.set_yticks([])  # Remove y-axis ticks
ax.set_frame_on(False)  # Turn off the frame
plt.suptitle('(b) Current soybean irrigation', fontsize=18, fontweight='bold', fontfamily='Arial', x=0.58, y=0.91)

# Save the entire figure as a JPG with DPI 300
plt.savefig('soybean_irrigation_t.jpg', format='jpg', dpi=300, bbox_inches='tight', pad_inches=0.1)

# Show the plot
plt.show()

0.0 115.90455455079999 1.2082800430347245
0.0 2.8724083922205463
2142.2805163005664