import pandas as pd
import numpy as np
import statsmodels.api as sm
import netCDF4 as nc
import pandas as pd
import os
import csv
from glob import glob
import xarray as xr
import matplotlib.ticker as ticker
import tifffile
import statsmodels.api as sm
import matplotlib.pyplot as plt
from matplotlib.gridspec import GridSpec
from matplotlib.colors import LinearSegmentedColormap, Normalize
from mpl_toolkits.axes_grid1 import make_axes_locatable
import geopandas as gpd
import matplotlib.colors as mcolors
import matplotlib.lines as mlines
from geotiff import GeoTiff
from rasterio import open as rasopen
import matplotlib.cm as cm
from PIL import Image
from matplotlib.font_manager import FontProperties
os.chdir('/Users/chenchenren/postdoc/paper/Other/Soil health/NF revision/bulk_density')
# Read the .dta file
maize_price = pd.read_stata('maize_price.dta')
maize_fer_price = pd.read_stata('maize_fer_price.dta')
soybean_price = pd.read_stata('soybean_price.dta')
soybean_fer_price = pd.read_stata('soybean_fer_price.dta')
soybean_fer_price = pd.read_stata('soybean_fer_price.dta')
irri_cost = pd.read_stata('irri_cost.dta')
usps = pd.read_stata('usps.dta')
region_state = pd.read_stata('region_state.dta')
region_state.rename(columns={'USPS': 'usps'}, inplace=True)
excel_file = "/Users/chenchenren/postdoc/paper/2N and water-US/Figure 3 Yiled under warming scenario/off_maize_mana_lnfer_irri.csv"
off_maize_mana = pd.read_csv(excel_file)
excel_file = "/Users/chenchenren/postdoc/paper/2N and water-US/Figure 3 Yiled under warming scenario/off_soybean_mana_lnfer_irri.csv"
off_soybean_mana = pd.read_csv(excel_file)
excel_file = "/Users/chenchenren/postdoc/paper/2N and water-US/Figure 3 Yiled under warming scenario/maize_yield_gain.csv"
maize_yield_gain = pd.read_csv(excel_file)
excel_file = "/Users/chenchenren/postdoc/paper/2N and water-US/Figure 3 Yiled under warming scenario/soybean_yield_gain.csv"
soybean_yield_gain = pd.read_csv(excel_file)
# Create new column yield_change_warming (kg/ha)
maize_yield_gain['yield_change_warming'] = maize_yield_gain['gap_yield_warming'] * 1000
# Generate gap_fer_mana column
off_maize_mana['gap_fer_mana'] = off_maize_mana['gap_fer'] * off_maize_mana['har']
# Generate gap_fer1 column (replace gap_fer with this value)
off_maize_mana['gap_fer1'] = (np.exp(off_maize_mana['lnfer_need']) - np.exp(off_maize_mana['lnfer'])) * off_maize_mana['har']
off_maize_mana['gap_fer'] = off_maize_mana['gap_fer1']
print(off_maize_mana)
# Generate gap_irri_mana column
off_maize_mana['gap_irri_mana'] = off_maize_mana['gap_irrigation'] * off_maize_mana['har'] * 1000
# Generate gap_irri column
off_maize_mana['gap_irri'] = (off_maize_mana['irri_need'] - off_maize_mana['irrigation1']) * off_maize_mana['har'] * 1000
# Generate yield_warming column
off_maize_mana['yield_warming'] = np.exp(off_maize_mana['y_temp_base_t'] - off_maize_mana['y_temp_base2015'] + off_maize_mana['lnyield_obs'])
# Generate yield_change_warming column
off_maize_mana['yield_change_warming'] = off_maize_mana['yield_warming'] - np.exp(off_maize_mana['lnyield_obs'])
# Sort by yield_change_warming
off_maize_mana = off_maize_mana.sort_values(by='yield_change_warming')
# Generate production_offset column for negative yield_change_warming
off_maize_mana['production_offset'] = np.where(off_maize_mana['yield_change_warming'] < 0,
-off_maize_mana['yield_change_warming'] * off_maize_mana['har'], np.nan)
# Append maize_yield_gain data
merged_data = pd.concat([off_maize_mana, maize_yield_gain], ignore_index=True)
# Generate production_gain_irri column for positive yield_change_warming
merged_data['production_gain_irri'] = np.where(merged_data['yield_change_warming'] > 0,
merged_data['yield_change_warming'] * merged_data['har'], np.nan)
# Replace production_gain_irri for negative yield_change_warming
merged_data['production_gain_irri'] = np.where(merged_data['yield_change_warming'] < 0,
(np.exp(merged_data['y_temp_irri'] - merged_data['y_temp_base_t'] + merged_data['lnyield_obs']) -
np.exp(merged_data['lnyield_obs'])) * merged_data['har'],
merged_data['production_gain_irri'])
# Generate production_gain_lnfer column for positive yield_change_warming
merged_data['production_gain_lnfer'] = np.where(merged_data['yield_change_warming'] > 0,
merged_data['yield_change_warming'] * merged_data['har'], np.nan)
# Replace production_gain_lnfer for negative yield_change_warming
merged_data['production_gain_lnfer'] = np.where(merged_data['yield_change_warming'] < 0,
merged_data['production_offset'], merged_data['production_gain_lnfer'])
# Generate production_gain_mana column for positive yield_change_warming
merged_data['production_gain_mana'] = np.where(merged_data['yield_change_warming'] > 0,
merged_data['yield_change_warming'] * merged_data['har'], np.nan)
# Replace production_gain_mana for negative yield_change_warming
merged_data['production_gain_mana'] = np.where(merged_data['yield_change_warming'] < 0,
(np.exp(merged_data['y_temp'] - merged_data['y_temp_base_t'] + merged_data['lnyield_obs']) -
np.exp(merged_data['lnyield_obs'])) * merged_data['har'],
merged_data['production_gain_mana'])
# Generate production_obs column
merged_data['production_obs'] = np.exp(merged_data['lnyield_obs']) * merged_data['har']
# Keep only the required columns
final_columns = ['scenario', 'geoid', 'production_gain_irri', 'production_gain_lnfer', 'production_gain_mana',
'gap_fer', 'gap_fer_mana', 'gap_irri', 'gap_irri_mana', 'production_obs']
final_data = merged_data[final_columns]
print(final_data)
tmp pre lnfer irrigation1 y_temp mana_group geoid \ 0 22.839014 6.322068 5.839142 0.597476 9.079736 1 20155 1 20.166084 1.344053 4.794494 11.580350 8.300022 1 6077 2 21.527692 1.345221 4.794494 11.580350 8.300022 1 6077 3 22.449214 8.420632 6.483547 0.042975 9.486223 1 20059 4 22.370492 0.586395 4.986941 11.580350 8.342841 1 6031 ... ... ... ... ... ... ... ... 2805 23.562460 8.808290 6.935410 0.003775 9.746407 3 5047 2806 21.724514 9.306550 6.463277 0.000000 9.678510 3 5071 2807 23.427664 8.846645 6.463277 0.000000 9.678510 3 5071 2808 22.086208 9.038597 7.055665 0.180312 9.788951 3 5115 2809 20.786906 8.489663 6.409451 0.000000 9.682911 3 21201 scenario lnyield_obs scenario.1 ... gap_irrigation lnfer_need \ 0 3.0 9.056389 3.0 ... -0.597376 6.045595 1 1.5 9.293982 1.5 ... -11.580250 4.794858 2 3.0 9.293982 3.0 ... -11.580250 4.803031 3 3.0 8.794492 3.0 ... -0.042875 6.628029 4 3.0 9.439953 3.0 ... -11.580250 5.007652 ... ... ... ... ... ... ... 2805 3.0 9.282893 3.0 ... 0.000000 7.211977 2806 1.5 8.991028 1.5 ... 0.000000 6.604987 2807 3.0 8.991028 3.0 ... 0.000000 6.911222 2808 1.5 9.097284 1.5 ... 0.000000 7.191783 2809 3.0 8.700696 3.0 ... 0.000000 6.548477 lnfer_values irri_need y_temp_base2015 y_temp_base_t irri_need_1 \ 0 6.128697 2.325000 9.079736 8.985312 2.325000 1 5.852775 2.695238 8.300022 8.299829 11.580350 2 5.914959 2.695238 8.300022 8.295453 11.580350 3 6.631116 2.183000 9.486223 9.420085 2.183000 4 6.374047 2.695238 8.342841 8.331340 11.580350 ... ... ... ... ... ... 2805 7.211977 2.695238 9.746407 9.619741 2.695238 2806 6.604987 2.695238 9.678510 9.587516 2.695238 2807 6.911222 NaN 9.678510 9.404765 2.695238 2808 7.191783 2.695238 9.788951 9.726792 2.695238 2809 6.548477 NaN 9.682911 9.592642 2.695238 y_temp_irri gap_fer_mana gap_fer1 0 9.079730 1.795945e+06 1.226286e+06 1 8.299829 3.353909e+06 6.494462e+02 2 8.295453 3.683461e+06 1.528393e+04 3 9.486212 1.297454e+06 1.268305e+06 4 8.331340 1.021969e+06 7.121358e+03 ... ... ... ... 2805 9.694141 1.061192e+05 1.061192e+05 2806 9.523139 5.612657e+04 5.612657e+04 2807 9.486994 2.083303e+05 2.083303e+05 2808 9.775172 7.189327e+04 7.189327e+04 2809 9.492386 2.086460e+04 2.086460e+04 [2810 rows x 22 columns] scenario geoid production_gain_irri production_gain_lnfer \ 0 3.0 48393 4.821182e+05 3.064302e+06 1 3.0 48195 3.115623e+07 6.909803e+07 2 3.0 48233 2.948702e+06 2.097004e+07 3 3.0 48163 3.575089e+07 1.035329e+07 4 3.0 48129 2.787368e+06 2.199344e+06 ... ... ... ... ... 4185 3.0 56029 8.334977e+05 8.334977e+05 4186 1.5 56031 7.228510e+05 7.228510e+05 4187 3.0 56031 1.013097e+06 1.013097e+06 4188 1.5 56043 5.754454e+04 5.754454e+04 4189 3.0 56043 5.987683e+05 5.987683e+05 production_gain_mana gap_fer gap_fer_mana gap_irri \ 0 4.037643e+06 4.773639e+05 4.773639e+05 NaN 1 9.077167e+07 6.110639e+06 6.110639e+06 6.168161e+07 2 2.795493e+07 3.311343e+06 3.311343e+06 NaN 3 1.573760e+07 5.071501e+06 5.071501e+06 9.433580e+06 4 2.787723e+06 7.947360e+04 7.947360e+04 1.330466e+06 ... ... ... ... ... 4185 8.334977e+05 NaN NaN NaN 4186 7.228510e+05 NaN NaN NaN 4187 1.013097e+06 NaN NaN NaN 4188 5.754454e+04 NaN NaN NaN 4189 5.987683e+05 NaN NaN NaN gap_irri_mana production_obs 0 0.0 1.271143e+07 1 0.0 2.893905e+08 2 0.0 8.392635e+07 3 0.0 3.026130e+07 4 0.0 1.042043e+07 ... ... ... 4185 NaN 1.098166e+07 4186 NaN 3.535052e+07 4187 NaN 3.535052e+07 4188 NaN 8.916640e+06 4189 NaN 8.916640e+06 [4190 rows x 10 columns]
# Step 1: Merge m:1 geoid with USPS data
merged_data = pd.merge(merged_data, usps, on='geoid', how='inner') # Inner merge corresponds to "keep if _merge == 3"
# Step 2: Merge m:1 usps with region_state data
merged_data = pd.merge(merged_data, region_state, on='usps', how='inner') # Inner merge
# Step 3: Merge m:1 stateansi with maize_price
merged_data = pd.merge(merged_data, maize_price, on='stateansi', how='inner') # Inner merge
# Step 4: Merge m:1 stateansi with maize_fer_price
merged_data = pd.merge(merged_data, maize_fer_price, on='stateansi', how='inner') # Inner merge
# Step 5: Merge m:1 stateansi with irri_cost
merged_data = pd.merge(merged_data, irri_cost, on='stateansi', how='inner') # Inner merge
# Step 6: Replace missing values with 0 for specific columns
merged_data['gap_fer'].fillna(0, inplace=True)
merged_data['gap_fer_mana'].fillna(0, inplace=True)
merged_data['gap_irri'].fillna(0, inplace=True)
merged_data['gap_irri_mana'].fillna(0, inplace=True)
# Step 7: Generate columns for production benefits and costs
merged_data['production_benefit_irri'] = merged_data['production_gain_irri'] * merged_data['maize_price'] / 1000000
merged_data['production_benefit_lnfer'] = merged_data['production_gain_lnfer'] * merged_data['maize_price'] / 1000000
merged_data['production_benefit_mana'] = merged_data['production_gain_mana'] * merged_data['maize_price'] / 1000000
merged_data['irri_cost_irri'] = merged_data['gap_irri'] * merged_data['irri_price'] / 1000000
merged_data['irri_cost_mana'] = merged_data['gap_irri_mana'] * merged_data['irri_price'] / 1000000
merged_data['fer_cost_fer'] = merged_data['gap_fer'] / 1000 * merged_data['fer_price'] / 1000000
merged_data['fer_cost_mana'] = merged_data['gap_fer_mana'] / 1000 * merged_data['fer_price'] / 1000000
# Step 8: Generate columns for net benefits
merged_data['net_benefits_irri'] = merged_data['production_benefit_irri'] - merged_data['irri_cost_irri']
merged_data['net_benefits_fer'] = merged_data['production_benefit_lnfer'] - merged_data['fer_cost_fer']
merged_data['net_benefits_mana'] = merged_data['production_benefit_mana'] - merged_data['irri_cost_mana'] - merged_data['fer_cost_mana']
# Step 9: Remove duplicates based on geoid and scenario
merged_data.drop_duplicates(subset=['geoid', 'scenario'], inplace=True)
# Step 10: Keep relevant columns
columns_to_keep = ['geoid', 'scenario', 'production_benefit_irri','production_benefit_lnfer','production_benefit_mana',
'irri_cost_irri','irri_cost_mana','fer_cost_fer','fer_cost_mana',
'net_benefits_irri', 'net_benefits_fer',
'net_benefits_mana', 'state1', 'production_obs']
final_data = merged_data[columns_to_keep]
print(final_data)
# Save to CSV
final_data.to_csv('maize_cost_benefit_county.csv', index=False)
geoid scenario production_benefit_irri production_benefit_lnfer \ 0 48393 3.0 0.089430 0.568409 1 48393 1.5 -0.251293 0.170940 2 48195 3.0 5.779283 12.817246 3 48195 1.5 -1.242042 4.431190 4 48233 3.0 0.546965 3.889810 ... ... ... ... ... 4185 41059 3.0 0.016020 0.016020 4186 41045 1.5 0.430206 0.430206 4187 41045 3.0 0.361436 0.361436 4188 41049 1.5 0.076315 0.076315 4189 41049 3.0 0.162405 0.162405 production_benefit_mana irri_cost_irri irri_cost_mana fer_cost_fer \ 0 0.748957 0.000000 0.0 0.937968 1 0.184301 0.000000 0.0 0.205265 2 16.837568 1.522186 0.0 12.006741 3 4.829888 1.522186 0.0 3.092772 4 5.185463 0.000000 0.0 6.506429 ... ... ... ... ... 4185 0.016020 0.000000 0.0 0.000000 4186 0.430206 0.000000 0.0 0.000000 4187 0.361436 0.000000 0.0 0.000000 4188 0.076315 0.000000 0.0 0.000000 4189 0.162405 0.000000 0.0 0.000000 fer_cost_mana net_benefits_irri net_benefits_fer net_benefits_mana \ 0 0.937968 0.089430 -0.369560 -0.189011 1 0.205265 -0.251293 -0.034325 -0.020963 2 12.006741 4.257097 0.810505 4.830827 3 3.092772 -2.764228 1.338418 1.737116 4 6.506429 0.546965 -2.616619 -1.320966 ... ... ... ... ... 4185 0.000000 0.016020 0.016020 0.016020 4186 0.000000 0.430206 0.430206 0.430206 4187 0.000000 0.361436 0.361436 0.361436 4188 0.000000 0.076315 0.076315 0.076315 4189 0.000000 0.162405 0.162405 0.162405 state1 production_obs 0 Texas 1.271143e+07 1 Texas 1.271143e+07 2 Texas 2.893905e+08 3 Texas 2.893905e+08 4 Texas 8.392635e+07 ... ... ... 4185 Oregon 3.779036e+07 4186 Oregon 8.652065e+07 4187 Oregon 8.652065e+07 4188 Oregon 3.742098e+07 4189 Oregon 3.742098e+07 [4190 rows x 14 columns]
# Summarize columns
print(final_data[['production_benefit_irri', 'net_benefits_irri', 'net_benefits_fer', 'net_benefits_mana']].sum())
# Conditional Summaries
scenario_1_5 = final_data[final_data['scenario'] == 1.5]
scenario_3 = final_data[final_data['scenario'] == 3]
# Net benefits for scenario 1.5
print(scenario_1_5['net_benefits_irri'].sum())
print(scenario_1_5[scenario_1_5['net_benefits_irri'] >= 0]['net_benefits_irri'].sum())
print(scenario_1_5[scenario_1_5['net_benefits_fer'] >= 0]['net_benefits_fer'].sum())
print(scenario_1_5[scenario_1_5['net_benefits_mana'] >= 0]['net_benefits_mana'].sum())
# Net benefits for scenario 3
print(scenario_3['net_benefits_irri'].sum())
print(scenario_3[scenario_3['net_benefits_irri'] >= 0]['net_benefits_irri'].sum())
print(scenario_3[scenario_3['net_benefits_fer'] >= 0]['net_benefits_fer'].sum())
print(scenario_3[scenario_3['net_benefits_mana'] >= 0]['net_benefits_mana'].sum())
# Tabstat equivalent for production_obs by scenario
print(final_data.groupby('scenario')['production_obs'].sum())
# Tabstat equivalent for positive net benefits under scenario 1.5 and 3
print(scenario_1_5[scenario_1_5['net_benefits_mana'] >= 0]['production_obs'].sum())
print(scenario_3[scenario_3['net_benefits_mana'] >= 0]['production_obs'].sum())
print(scenario_1_5[scenario_1_5['net_benefits_fer'] >= 0]['production_obs'].sum())
print(scenario_3[scenario_3['net_benefits_fer'] >= 0]['production_obs'].sum())
print(scenario_1_5[scenario_1_5['net_benefits_irri'] <= 0]['production_obs'].sum())
print(scenario_3[scenario_3['net_benefits_irri'] <= 0]['production_obs'].sum())
production_benefit_irri 4690.771517 net_benefits_irri -2101.911589 net_benefits_fer 1711.482809 net_benefits_mana -110.180353 dtype: float64 -932.4413983692078 1099.5001942062543 923.8257195169043 1132.558445295366 -1169.4701910642016 1548.1302568316698 1203.1421144872845 1519.6743318418678 scenario 1.5 3.351794e+11 3.0 3.351794e+11 Name: production_obs, dtype: float64 285460701044.7105 280127761233.98975 314417185730.89453 301393182706.6963 109800349474.69122 132852328111.3526
# For scenario 1.5 with positive net_benefits_mana
print((scenario_1_5[scenario_1_5['net_benefits_mana'] >= 0]['production_obs'].sum()) / (scenario_1_5['production_obs'].sum()) * 100)
# For scenario 3 with positive net_benefits_mana
print((scenario_3[scenario_3['net_benefits_mana'] >= 0]['production_obs'].sum()) / (scenario_3['production_obs'].sum()) * 100)
# For scenario 1.5 with positive net_benefits_fer
print((scenario_1_5[scenario_1_5['net_benefits_fer'] >= 0]['production_obs'].sum()) / (scenario_1_5['production_obs'].sum()) * 100)
# For scenario 3 with positive net_benefits_fer
print((scenario_3[scenario_3['net_benefits_fer'] >= 0]['production_obs'].sum()) / (scenario_3['production_obs'].sum()) * 100)
# For scenario 1.5 with negative or zero net_benefits_irri
print((scenario_1_5[scenario_1_5['net_benefits_irri'] <= 0]['production_obs'].sum()) / (scenario_1_5['production_obs'].sum()) * 100)
# For scenario 3 with negative or zero net_benefits_irri
print((scenario_3[scenario_3['net_benefits_irri'] <= 0]['production_obs'].sum()) / (scenario_3['production_obs'].sum()) * 100)
85.16655012521316 83.57547967648838 93.80565139362068 89.91996974871512 32.75868423596968 39.63618956983722
# Group by state1 and scenario and sum the relevant columns
state_scenario_sum = merged_data.groupby(['state1', 'scenario']).agg(
production_benefit_irri_t=('production_benefit_irri', 'sum'),
production_benefit_lnfer_t=('production_benefit_lnfer', 'sum'),
production_benefit_mana_t=('production_benefit_mana', 'sum'),
irri_cost_irri_t=('irri_cost_irri', 'sum'),
irri_cost_mana_t=('irri_cost_mana', 'sum'),
fer_cost_fer_t=('fer_cost_fer', 'sum'),
fer_cost_mana_t=('fer_cost_mana', 'sum'),
net_benefits_irri_t=('net_benefits_irri', 'sum'),
net_benefits_lnfer_t=('net_benefits_fer', 'sum'),
net_benefits_mana_t=('net_benefits_mana', 'sum')
).reset_index()
# Keep only relevant columns
final_data = state_scenario_sum[['state1', 'scenario',
'production_benefit_irri_t', 'irri_cost_irri_t', 'net_benefits_irri_t',
'production_benefit_lnfer_t', 'fer_cost_fer_t', 'net_benefits_lnfer_t',
'production_benefit_mana_t', 'irri_cost_mana_t', 'fer_cost_mana_t', 'net_benefits_mana_t']]
# Sort by scenario and state1
final_data = final_data.sort_values(by=['scenario', 'state1'])
# Drop duplicates if needed (unlikely needed with groupby, but included for safety)
final_data.drop_duplicates(subset=['scenario', 'state1'], inplace=True)
# Reorder the columns as specified
final_data = final_data[['scenario', 'state1',
'production_benefit_irri_t', 'irri_cost_irri_t', 'net_benefits_irri_t',
'production_benefit_lnfer_t', 'fer_cost_fer_t', 'net_benefits_lnfer_t',
'production_benefit_mana_t', 'irri_cost_mana_t', 'fer_cost_mana_t', 'net_benefits_mana_t']]
# Print or save the final results
final_data.to_csv('cost_benefit_state_maize.csv', index=False)
# Create new column yield_change_warming (kg/ha)
soybean_yield_gain['yield_change_warming'] = soybean_yield_gain['gap_yield_warming'] * 1000
# Generate gap_fer_mana column
off_soybean_mana['gap_fer_mana'] = off_soybean_mana['gap_fer'] * off_soybean_mana['har']
# Generate gap_fer1 column (replace gap_fer with this value)
off_soybean_mana['gap_fer1'] = (np.exp(off_soybean_mana['lnfer_need']) - np.exp(off_soybean_mana['lnfer'])) * off_soybean_mana['har']
off_soybean_mana['gap_fer'] = off_soybean_mana['gap_fer1']
print(off_soybean_mana)
# Generate gap_irri_mana column
off_soybean_mana['gap_irri_mana'] = off_soybean_mana['gap_irrigation'] * off_soybean_mana['har'] * 1000
# Generate gap_irri column
off_soybean_mana['gap_irri'] = (off_soybean_mana['irri_need'] - off_soybean_mana['irrigation1']) * off_soybean_mana['har'] * 1000
# Generate yield_warming column
off_soybean_mana['yield_warming'] = np.exp(off_soybean_mana['y_temp_base_t'] - off_soybean_mana['y_temp_base2015'] + off_soybean_mana['lnyield_obs'])
# Generate yield_change_warming column
off_soybean_mana['yield_change_warming'] = off_soybean_mana['yield_warming'] - np.exp(off_soybean_mana['lnyield_obs'])
# Sort by yield_change_warming
off_soybean_mana = off_soybean_mana.sort_values(by='yield_change_warming')
# Generate production_offset column for negative yield_change_warming
off_soybean_mana['production_offset'] = np.where(off_soybean_mana['yield_change_warming'] < 0,
-off_soybean_mana['yield_change_warming'] * off_soybean_mana['har'], np.nan)
# Append soybean_yield_gain data
merged_data = pd.concat([off_soybean_mana, soybean_yield_gain], ignore_index=True)
# Generate production_gain_irri column for positive yield_change_warming
merged_data['production_gain_irri'] = np.where(merged_data['yield_change_warming'] > 0,
merged_data['yield_change_warming'] * merged_data['har'], np.nan)
# Replace production_gain_irri for negative yield_change_warming
merged_data['production_gain_irri'] = np.where(merged_data['yield_change_warming'] < 0,
(np.exp(merged_data['y_temp_irri'] - merged_data['y_temp_base_t'] + merged_data['lnyield_obs']) -
np.exp(merged_data['lnyield_obs'])) * merged_data['har'],
merged_data['production_gain_irri'])
# Generate production_gain_lnfer column for positive yield_change_warming
merged_data['production_gain_lnfer'] = np.where(merged_data['yield_change_warming'] > 0,
merged_data['yield_change_warming'] * merged_data['har'], np.nan)
# Replace production_gain_lnfer for negative yield_change_warming
merged_data['production_gain_lnfer'] = np.where(merged_data['yield_change_warming'] < 0,
merged_data['production_offset'], merged_data['production_gain_lnfer'])
# Generate production_gain_mana column for positive yield_change_warming
merged_data['production_gain_mana'] = np.where(merged_data['yield_change_warming'] > 0,
merged_data['yield_change_warming'] * merged_data['har'], np.nan)
# Replace production_gain_mana for negative yield_change_warming
merged_data['production_gain_mana'] = np.where(merged_data['yield_change_warming'] < 0,
(np.exp(merged_data['y_temp'] - merged_data['y_temp_base_t'] + merged_data['lnyield_obs']) -
np.exp(merged_data['lnyield_obs'])) * merged_data['har'],
merged_data['production_gain_mana'])
# Generate production_obs column
merged_data['production_obs'] = np.exp(merged_data['lnyield_obs']) * merged_data['har']
# Keep only the required columns
final_columns = ['scenario', 'geoid', 'production_gain_irri', 'production_gain_lnfer', 'production_gain_mana',
'gap_fer', 'gap_fer_mana', 'gap_irri', 'gap_irri_mana', 'production_obs']
final_data = merged_data[final_columns]
print(final_data)
tmp pre lnfer irrigation1 y_temp mana_group geoid \ 0 22.760628 5.757780 2.038479 0.001818 7.979504 1 39017 1 27.027358 6.635976 2.035268 0.126456 7.761395 1 13043 2 25.319158 7.937325 1.236794 0.000507 7.905103 1 37177 3 24.492824 5.942823 0.295453 0.000797 7.711970 1 17065 4 21.297634 6.267068 -0.046396 0.001389 7.763110 1 17067 ... ... ... ... ... ... ... ... 1993 25.723312 6.718675 4.369129 0.149384 8.278050 3 1043 1994 26.457528 6.493699 2.301965 0.027715 7.966898 3 1075 1995 27.248052 6.264505 3.149844 0.318468 7.910322 3 1085 1996 26.679214 6.355030 4.096728 0.210819 8.090918 3 1087 1997 27.222396 6.256731 2.847938 0.009984 7.908306 3 1091 scenario lnyield_obs scenario.1 ... gap_irrigation lnfer_need \ 0 3.0 7.994525 3.0 ... -0.001794 2.194755 1 3.0 7.499042 3.0 ... -0.126432 3.559168 2 3.0 7.918210 3.0 ... -0.000484 2.452045 3 3.0 7.946740 3.0 ... -0.000773 1.039118 4 1.5 8.213926 1.5 ... -0.001366 0.062503 ... ... ... ... ... ... ... 1993 3.0 7.978103 3.0 ... 0.000000 6.095648 1994 3.0 7.625159 3.0 ... 0.000000 4.401289 1995 3.0 7.784370 3.0 ... 0.000000 5.336927 1996 3.0 7.847030 3.0 ... 0.000000 5.883013 1997 3.0 7.563227 3.0 ... 0.000000 4.931680 lnfer_values irri_need y_temp_base2015 y_temp_base_t irri_need_1 \ 0 2.194328 0.837454 7.979504 7.958094 0.837454 1 3.458172 0.837454 7.761395 7.561807 0.837454 2 2.451664 0.837454 7.905103 7.738559 0.837454 3 1.039221 NaN 7.711970 7.610054 0.837454 4 0.063104 0.235000 7.763110 7.748190 0.235000 ... ... ... ... ... ... 1993 6.095648 0.837454 8.278050 8.054614 0.837454 1994 4.401289 0.837454 7.966898 7.681917 0.837454 1995 5.336927 0.837454 7.910322 7.639415 0.837454 1996 5.883013 0.837454 8.090918 7.862778 0.837454 1997 4.931680 0.837454 7.908306 7.623575 0.837454 y_temp_irri gap_fer_mana gap_fer1 0 7.938183 1.911200e+04 1.916854e+04 1 7.540154 3.221574e+04 3.672656e+04 2 7.721508 1.046408e+05 1.046974e+05 3 7.647000 5.554885e+04 5.553796e+04 4 7.763084 6.456998e+03 6.419590e+03 ... ... ... ... 1993 7.957525 1.060625e+06 1.060625e+06 1994 7.649035 4.036800e+04 4.036800e+04 1995 7.608856 3.136511e+05 3.136511e+05 1996 7.798237 1.787960e+05 1.787960e+05 1997 7.580939 1.915121e+05 1.915121e+05 [1998 rows x 22 columns] scenario geoid production_gain_irri production_gain_lnfer \ 0 3.0 20033 3.926949e+04 8.745441e+05 1 3.0 20119 0.000000e+00 6.395546e+06 2 3.0 5043 -8.734129e+05 1.387963e+07 3 3.0 20097 0.000000e+00 5.735609e+06 4 3.0 5069 -7.151300e+04 4.271544e+07 ... ... ... ... ... 3541 3.0 55137 2.453971e+06 2.453971e+06 3542 1.5 55139 1.305732e+06 1.305732e+06 3543 3.0 55139 5.326260e+06 5.326260e+06 3544 1.5 55141 1.437986e+06 1.437986e+06 3545 3.0 55141 5.416751e+06 5.416751e+06 production_gain_mana gap_fer gap_fer_mana gap_irri \ 0 1.300079e+06 4.271775e+04 4.497173e+04 NaN 1 8.784781e+06 5.624284e+04 1.603278e+05 NaN 2 2.009612e+07 5.686259e+06 3.629138e+06 6.271727e+06 3 7.871425e+06 2.815199e+05 3.206415e+05 NaN 4 6.216062e+07 9.129002e+06 4.603164e+06 3.222641e+06 ... ... ... ... ... 3541 2.453971e+06 NaN NaN NaN 3542 1.305732e+06 NaN NaN NaN 3543 5.326260e+06 NaN NaN NaN 3544 1.437986e+06 NaN NaN NaN 3545 5.416751e+06 NaN NaN NaN gap_irri_mana production_obs 0 -3.588606e+05 2.671874e+06 1 -1.887187e+07 2.351526e+07 2 -4.890795e+06 4.486887e+07 3 -7.909319e+06 2.113825e+07 4 -3.260582e+07 1.365489e+08 ... ... ... 3541 NaN 1.499843e+07 3542 NaN 4.461606e+07 3543 NaN 4.461606e+07 3544 NaN 1.845907e+07 3545 NaN 1.845907e+07 [3546 rows x 10 columns]
# Step 1: Merge m:1 geoid with USPS data
merged_data = pd.merge(merged_data, usps, on='geoid', how='inner') # Inner merge corresponds to "keep if _merge == 3"
# Step 2: Merge m:1 usps with region_state data
merged_data = pd.merge(merged_data, region_state, on='usps', how='inner') # Inner merge
# Step 3: Merge m:1 stateansi with soybean_price
merged_data = pd.merge(merged_data, soybean_price, on='stateansi', how='inner') # Inner merge
# Step 4: Merge m:1 stateansi with soybean_fer_price
merged_data = pd.merge(merged_data, soybean_fer_price, on='stateansi', how='inner') # Inner merge
# Step 5: Merge m:1 stateansi with irri_cost
merged_data = pd.merge(merged_data, irri_cost, on='stateansi', how='inner') # Inner merge
# Step 6: Replace missing values with 0 for specific columns
merged_data['gap_fer'].fillna(0, inplace=True)
merged_data['gap_fer_mana'].fillna(0, inplace=True)
merged_data['gap_irri'].fillna(0, inplace=True)
merged_data['gap_irri_mana'].fillna(0, inplace=True)
# Step 7: Generate columns for production benefits and costs
merged_data['production_benefit_irri'] = merged_data['production_gain_irri'] * merged_data['soybean_price'] / 1000000
merged_data['production_benefit_lnfer'] = merged_data['production_gain_lnfer'] * merged_data['soybean_price'] / 1000000
merged_data['production_benefit_mana'] = merged_data['production_gain_mana'] * merged_data['soybean_price'] / 1000000
merged_data['irri_cost_irri'] = merged_data['gap_irri'] * merged_data['irri_price'] / 1000000
merged_data['irri_cost_mana'] = merged_data['gap_irri_mana'] * merged_data['irri_price'] / 1000000
merged_data['fer_cost_fer'] = merged_data['gap_fer'] / 1000 * merged_data['fer_price'] / 1000000
merged_data['fer_cost_mana'] = merged_data['gap_fer_mana'] / 1000 * merged_data['fer_price'] / 1000000
# Step 8: Generate columns for net benefits
merged_data['net_benefits_irri'] = merged_data['production_benefit_irri'] - merged_data['irri_cost_irri']
merged_data['net_benefits_fer'] = merged_data['production_benefit_lnfer'] - merged_data['fer_cost_fer']
merged_data['net_benefits_mana'] = merged_data['production_benefit_mana'] - merged_data['irri_cost_mana'] - merged_data['fer_cost_mana']
# Step 9: Remove duplicates based on geoid and scenario
merged_data.drop_duplicates(subset=['geoid', 'scenario'], inplace=True)
# Step 10: Keep relevant columns
columns_to_keep = ['geoid', 'scenario', 'production_benefit_irri','production_benefit_lnfer','production_benefit_mana',
'irri_cost_irri','irri_cost_mana','fer_cost_fer','fer_cost_mana',
'net_benefits_irri', 'net_benefits_fer',
'net_benefits_mana', 'state1', 'production_obs']
final_data = merged_data[columns_to_keep]
print(final_data)
# Save to CSV
final_data.to_csv('soybean_cost_benefit_county.csv', index=False)
geoid scenario production_benefit_irri production_benefit_lnfer \ 0 20033 3.0 0.014743 0.328336 1 20033 1.5 0.010457 0.077034 2 20119 3.0 0.000000 2.401122 3 20119 1.5 0.000000 0.508782 4 20097 3.0 0.000000 2.153357 ... ... ... ... ... 3541 55137 3.0 0.930535 0.930535 3542 55139 1.5 0.495128 0.495128 3543 55139 3.0 2.019695 2.019695 3544 55141 1.5 0.545278 0.545278 3545 55141 3.0 2.054009 2.054009 production_benefit_mana irri_cost_irri irri_cost_mana fer_cost_fer \ 0 0.488097 0.0 -0.012904 0.082583 1 0.083442 0.0 -0.012904 0.003784 2 3.298127 0.0 -0.678617 0.108730 3 0.539896 0.0 -0.678617 0.009765 4 2.955220 0.0 -0.284413 0.544241 ... ... ... ... ... 3541 0.930535 0.0 0.000000 0.000000 3542 0.495128 0.0 0.000000 0.000000 3543 2.019695 0.0 0.000000 0.000000 3544 0.545278 0.0 0.000000 0.000000 3545 2.054009 0.0 0.000000 0.000000 fer_cost_mana net_benefits_irri net_benefits_fer net_benefits_mana \ 0 0.086940 0.014743 0.245753 0.414061 1 0.004381 0.010457 0.073250 0.091965 2 0.309950 0.000000 2.292392 3.666795 3 0.031092 0.000000 0.499017 1.187421 4 0.619872 0.000000 1.609116 2.619761 ... ... ... ... ... 3541 0.000000 0.930535 0.930535 0.930535 3542 0.000000 0.495128 0.495128 0.495128 3543 0.000000 2.019695 2.019695 2.019695 3544 0.000000 0.545278 0.545278 0.545278 3545 0.000000 2.054009 2.054009 2.054009 state1 production_obs 0 Kansas 2.671874e+06 1 Kansas 2.671874e+06 2 Kansas 2.351526e+07 3 Kansas 2.351526e+07 4 Kansas 2.113825e+07 ... ... ... 3541 Wisconsin 1.499843e+07 3542 Wisconsin 4.461606e+07 3543 Wisconsin 4.461606e+07 3544 Wisconsin 1.845907e+07 3545 Wisconsin 1.845907e+07 [3546 rows x 14 columns]
# Summarize columns
print(final_data[['production_benefit_irri', 'net_benefits_irri', 'net_benefits_fer', 'net_benefits_mana']].sum())
# Conditional Summaries
scenario_1_5 = final_data[final_data['scenario'] == 1.5]
scenario_3 = final_data[final_data['scenario'] == 3]
# Net benefits for scenario 1.5
print(scenario_1_5['net_benefits_irri'].sum())
print(scenario_1_5[scenario_1_5['net_benefits_irri'] >= 0]['net_benefits_irri'].sum())
print(scenario_1_5[scenario_1_5['net_benefits_fer'] >= 0]['net_benefits_fer'].sum())
print(scenario_1_5[scenario_1_5['net_benefits_mana'] >= 0]['net_benefits_mana'].sum())
# Net benefits for scenario 3
print(scenario_3['net_benefits_irri'].sum())
print(scenario_3[scenario_3['net_benefits_irri'] >= 0]['net_benefits_irri'].sum())
print(scenario_3[scenario_3['net_benefits_fer'] >= 0]['net_benefits_fer'].sum())
print(scenario_3[scenario_3['net_benefits_mana'] >= 0]['net_benefits_mana'].sum())
# Tabstat equivalent for production_obs by scenario
print(final_data.groupby('scenario')['production_obs'].sum())
# Tabstat equivalent for positive net benefits under scenario 1.5 and 3
print(scenario_1_5[scenario_1_5['net_benefits_mana'] >= 0]['production_obs'].sum())
print(scenario_3[scenario_3['net_benefits_mana'] >= 0]['production_obs'].sum())
print(scenario_1_5[scenario_1_5['net_benefits_fer'] >= 0]['production_obs'].sum())
print(scenario_3[scenario_3['net_benefits_fer'] >= 0]['production_obs'].sum())
print(scenario_1_5[scenario_1_5['net_benefits_irri'] <= 0]['production_obs'].sum())
print(scenario_3[scenario_3['net_benefits_irri'] <= 0]['production_obs'].sum())
production_benefit_irri 1233.221871 net_benefits_irri -1360.621585 net_benefits_fer 4336.430577 net_benefits_mana 5327.389882 dtype: float64 -886.4326407528958 744.6502473856877 1161.5441264661172 1348.7481445275912 -474.1889441301462 2014.089530570012 3548.0385082728403 4198.88257263227 scenario 1.5 9.879430e+10 3.0 9.879430e+10 Name: production_obs, dtype: float64 96200235087.52872 97150364329.88513 98493058643.60019 95383036264.80768 27386040201.601387 30213063050.452576
# For scenario 1.5 with positive net_benefits_mana
print((scenario_1_5[scenario_1_5['net_benefits_mana'] >= 0]['production_obs'].sum()) / (scenario_1_5['production_obs'].sum()) * 100)
# For scenario 3 with positive net_benefits_mana
print((scenario_3[scenario_3['net_benefits_mana'] >= 0]['production_obs'].sum()) / (scenario_3['production_obs'].sum()) * 100)
# For scenario 1.5 with positive net_benefits_fer
print((scenario_1_5[scenario_1_5['net_benefits_fer'] >= 0]['production_obs'].sum()) / (scenario_1_5['production_obs'].sum()) * 100)
# For scenario 3 with positive net_benefits_fer
print((scenario_3[scenario_3['net_benefits_fer'] >= 0]['production_obs'].sum()) / (scenario_3['production_obs'].sum()) * 100)
# For scenario 1.5 with negative or zero net_benefits_irri
print((scenario_1_5[scenario_1_5['net_benefits_irri'] <= 0]['production_obs'].sum()) / (scenario_1_5['production_obs'].sum()) * 100)
# For scenario 3 with negative or zero net_benefits_irri
print((scenario_3[scenario_3['net_benefits_irri'] <= 0]['production_obs'].sum()) / (scenario_3['production_obs'].sum()) * 100)
97.3742766504422 98.33600140729659 99.69508215633745 96.54710461526315 27.72026339150363 30.58178763550045
# Group by state1 and scenario and sum the relevant columns
state_scenario_sum = merged_data.groupby(['state1', 'scenario']).agg(
production_benefit_irri_t=('production_benefit_irri', 'sum'),
production_benefit_lnfer_t=('production_benefit_lnfer', 'sum'),
production_benefit_mana_t=('production_benefit_mana', 'sum'),
irri_cost_irri_t=('irri_cost_irri', 'sum'),
irri_cost_mana_t=('irri_cost_mana', 'sum'),
fer_cost_fer_t=('fer_cost_fer', 'sum'),
fer_cost_mana_t=('fer_cost_mana', 'sum'),
net_benefits_irri_t=('net_benefits_irri', 'sum'),
net_benefits_lnfer_t=('net_benefits_fer', 'sum'),
net_benefits_mana_t=('net_benefits_mana', 'sum')
).reset_index()
# Keep only relevant columns
final_data = state_scenario_sum[['state1', 'scenario',
'production_benefit_irri_t', 'irri_cost_irri_t', 'net_benefits_irri_t',
'production_benefit_lnfer_t', 'fer_cost_fer_t', 'net_benefits_lnfer_t',
'production_benefit_mana_t', 'irri_cost_mana_t', 'fer_cost_mana_t', 'net_benefits_mana_t']]
# Sort by scenario and state1
final_data = final_data.sort_values(by=['scenario', 'state1'])
# Drop duplicates if needed (unlikely needed with groupby, but included for safety)
final_data.drop_duplicates(subset=['scenario', 'state1'], inplace=True)
# Reorder the columns as specified
final_data = final_data[['scenario', 'state1',
'production_benefit_irri_t', 'irri_cost_irri_t', 'net_benefits_irri_t',
'production_benefit_lnfer_t', 'fer_cost_fer_t', 'net_benefits_lnfer_t',
'production_benefit_mana_t', 'irri_cost_mana_t', 'fer_cost_mana_t', 'net_benefits_mana_t']]
# Print or save the final results
final_data.to_csv('cost_benefit_state_soybean.csv', index=False)