This notebook presents the nitrate database compilation. It is centered around Codispoti et al.'s 2013 compilation of historical nitrate measurements in the Arctic Ocean.
All figures exported from this notebook are prefixed with FIGURE_FN-COMP_
.
%run imports.py
%load_ext autoreload
%autoreload 2
# Define dimensions
dims = dict(
reg_name=hv.Dimension('reg_name', label='Region'),
ntr0=hv.Dimension('ntr0', label='Surface NO₃ conc.', range=(0,13)),
doy=hv.Dimension('doy', label='Day of the year')
)
First, download the biggest database of vertical profiles nitrate of nitrate concentrations in the Arctic known to us: The one compiled by Codispoti and collaborators and published in 2013: http://dx.doi.org/10.1016/j.pocean.2012.11.006
!wget -c --read-timeout=5 -P ../data/no3-compilation/ "https://www.nodc.noaa.gov/archive/arc0034/0072133/1.1/data/0-data/Codispoti_Arctic_Nutrients_Submission_11-11-2010.csv"
def load_codispotietal():
"""
Codispoti et al., 2013
"""
renamedict = dict(NO3='nitrate', Sal='sal',
T='temp', z='depth', Longitude='lon',
Latitude='lat', Date='date', Station='station', Cruise='cruise'
)
df = (pd.read_csv('../data/no3-compilation/Codispoti_Arctic_Nutrients_Submission_11-11-2010.csv',
na_values=-999,
parse_dates=['Date'], dtype={'Station': str},
)[['Date','Latitude','Longitude','NO3','z','T','Sal','Station','Cruise']]
.rename(columns=renamedict)
.dropna(subset=['nitrate'])
)
df = df.assign(station=lambda row: row.cruise+'+'+row.station.astype(str))
df = df.drop(columns=['cruise'])
df = df.groupby(['station', 'date', 'depth']).mean().reset_index()
df['p'] = gsw.p_from_z(-df.depth, df.lat)
df['SA'] = gsw.SA_from_SP(df.sal, df.p, df.lon, df.lat)
df['CT'] = gsw.CT_from_pt(df.SA, df.temp)
df['sigth'] = gsw.sigma0(df.SA, df.CT)
df = df.assign(database='codispoti')
return df
These data from the Canadian Arctic consist mostly of data collected during cruises of ArcticNet, DFO, and some others, and are treated in more detail by Pierre Coupel et al., 2019 (in prep.)
def load_canadian_arctic():
fname = '../data/no3-compilation/NUT-ArcticNet_NOW_CATS_Coupel_reduced.csv'
if os.path.exists(fname):
df = (
pd.read_csv(fname, dtype={'Station': str})
.rename(
columns=dict(
Station='station', Depth='depth', Longitude='lon', Latitude='lat',
Nitrate='nitrate', Temperature='temp', Salinity='sal'
)
)
.dropna(subset=['nitrate'])
)
df.Day = df.Day.fillna(15)
df['date'] = pd.to_datetime(df.Year*10000+df.Month*100+df.Day, format='%Y%m%d')
df = df.assign(station=lambda row: row.station.astype(str))
df = df.groupby(['station', 'date', 'depth']).mean().reset_index()
df['p'] = gsw.p_from_z(-df.depth, df.lat)
df['SA'] = gsw.SA_from_SP(df.sal, df.p, df.lon, df.lat)
df['CT'] = gsw.CT_from_pt(df.SA, df.temp)
df['sigth'] = gsw.sigma0(df.SA, df.CT)
df = df.drop(columns=['Year', 'Month', 'Day'])
df = df.assign(database='arcticnet')
# conform longitudes to interval -180, 180
df['lon'] = df.lon.where(df.lon<=180, df.lon-360)
return df
else:
print('Data not available locally')
return pd.DataFrame(columns=[
'station', 'date', 'depth', 'lon', 'lat',
'temp', 'sal', 'nitrate', 'p',
'SA', 'CT', 'sigth', 'database'
])
Winter data are always hard to come by, so let's get some more pre-bloom nitrate concentrations from the Chukchi Sea measured by Arrigo and collaborators and published under https://agupubs.onlinelibrary.wiley.com/doi/abs/10.1002/2017JG003881. These are downloaded from https://datadryad.org/stash/dataset/doi:10.5061/dryad.fm7b5
!wget -c --read-timeout=5 -O ../data/no3-compilation/SUBICE_high_NO3_hy1.csv "https://datadryad.org/stash/downloads/file_stream/29860"
def load_arrigoetal():
df = pd.read_csv('../data/no3-compilation/SUBICE_high_NO3_hy1.csv',
na_values=-999, skiprows=9, header=[0,1],
dtype={'STNNBR': str},
).replace(to_replace=-999, value=np.nan)
df.columns = df.columns.droplevel(1)
df = df.loc[df.BTLDPTH<=10].groupby('STNNBR').mean().reset_index()
df.DATE = df.DATE.apply(lambda x: pd.to_datetime(str(x), format='%Y%m%d'))
renamedict = dict(
DATE='date',LATITUDE='lat', LONGITUDE='lon', NITRAT='nitrate', DEPTH='depth',
STNNBR='station', CTDTMP='temp', CTDSAL='sal',
)
df = df.rename(columns=renamedict)
df['p'] = gsw.p_from_z(-df.depth, df.lat)
df['SA'] = gsw.SA_from_SP(df.sal, df.p, df.lon, df.lat)
df['CT'] = gsw.CT_from_pt(df.SA, df.temp)
df['sigth'] = gsw.sigma0(df.SA, df.CT)
return df[list(renamedict.values())+['p', 'SA', 'CT', 'sigth']].assign(database='arcticnet')
Check this for duplicates. First, granularize the longitudes and latitudes for comparison, then check that no profiles overlap.
def get_timelatlon_bin(df):
df['lonbin'] = pd.to_numeric(pd.cut(df.lon, np.arange(-180, 180, 0.1), labels=0.05+np.arange(-180, 180, 0.1)[:-1]))
df['latbin'] = pd.to_numeric(pd.cut(df.lat, np.arange(60, 90, 0.05), labels=0.025+np.arange(60, 90, 0.05)[:-1]))
# one representative entry for each profile
return df.groupby(['date', 'lonbin', 'latbin']).first()
stns_canadian_arctic = get_timelatlon_bin(load_canadian_arctic())
stns_codispotietal = get_timelatlon_bin(load_codispotietal())
canadian_arctic_stations_to_merge = stns_canadian_arctic.station.loc[
stns_canadian_arctic.index
.drop(
stns_codispotietal.index,
errors='ignore',
)
].values
Now, we can select those data from the Canadian Arctic that will not duplicate any data that's already in the Codispoti et al. database.
canadian_arctic = load_canadian_arctic()
canadian_arctic_to_merge = canadian_arctic.loc[canadian_arctic.station.isin(canadian_arctic_stations_to_merge)]
This makes us drop this many records:
len(canadian_arctic) - len(canadian_arctic_to_merge)
dfl = [
load_codispotietal(),
load_arrigoetal(),
canadian_arctic_to_merge
]
df = pd.concat(dfl, sort=False).reset_index()
df.to_pickle('../data/no3-compilation/tmp_compiled.pandas')
df = pd.read_pickle('../data/no3-compilation/tmp_compiled.pandas')
def groupwise_interp(df):
if df.depth.min()>20:
return None
else:
bins = np.arange(-1,301.1,2)
labels = bins[:-1]+np.diff(bins)/2
df.depth = pd.to_numeric(pd.cut(df.depth, bins=bins, labels=labels))
df = (df
.dropna(subset=['depth'])
.groupby('depth').mean()
.reindex(index=labels)
)
return df.interpolate(method='linear', limit_area='inside').fillna(method='bfill').drop(
columns=['station', 'date', 'depth'], errors='ignore')
df = (df.groupby(['database', 'station', 'date'])
.apply(groupwise_interp)
.reset_index()
)
df.to_pickle('../data/no3-compilation/tmp_compiled-interpolated.pandas')
In the following, we derive a number of quantities for each profile. Not all of them made it into the final article, but we left them in here for further analyses that might pop up in the future.
df = pd.read_pickle('../data/no3-compilation/tmp_compiled-interpolated.pandas')
df = df.loc[df.database.isin(['codispoti', 'arcticnet'])]
def find_ml (sigth, depth, delta_sigth_crit=0.1):
"""
Find mixed layer with sfc. density+0.1 kg/m3 criterion.
"""
index = np.where(sigth>np.nanmean(sigth.iloc[:5]) + delta_sigth_crit)[0]
if len(index)>0:
return depth.iloc[index[0]].astype(float)
else:
return np.nan
df = df.merge(pd.DataFrame(
df.groupby(df.station).apply(lambda g: find_ml(g.sigth, g.depth)),
columns=['mld']),on='station')
There are two relevant nitraclines:
nc0
: Biologically meaningful, when NO3 jumps over say 1uM, as it indicates the zone of nitrate depletion (if there is one)nc
: Physically meaningful, when NO3 jumps over say sfc.NO3+1uM, as it indicates water mass transitionsdef find_nitracline0(no3, depth, no3crit=1.0):
index = np.where(no3>=no3crit)[0]
if len(index)>0:
return depth.iloc[index[0]].astype(float)
else:
return np.nan
def find_nitracline(no3, depth):
no3sfc = no3.loc[depth<=10].mean()
index = np.where(no3>no3sfc+1.)[0]
if len(index)>0:
return depth.iloc[index[0]].astype(float)
else:
return np.nan
gb = df.groupby(df.station)
df = df.merge(pd.DataFrame(
dict(nc0=gb.apply(lambda g: find_nitracline0(g.nitrate, g.depth, 1)),
nc =gb.apply(lambda g: find_nitracline(g.nitrate, g.depth))
)),on='station')
# 40-100 m density difference
sigth0 = df.loc[df.depth.between(35, 45)].groupby('station').mean().sigth
sigth_deep = df.loc[df.depth.between(95, 105)].groupby('station').mean().sigth
df = df.merge(pd.DataFrame(
dict(delta_sigth=(sigth_deep-sigth0).values, station=sigth0.index.values)
))
def buoyancy_freq(sigma, depth, from_depth, layer_thickness=30):
"""
Calculate buoyancy frequency
over depth range [from_depth, from_depth+layer_thickness].
"""
nc_depth_range = (from_depth<=depth) & (depth<=from_depth+layer_thickness)
if not np.any(nc_depth_range):
return np.nan
else:
def slope(x, y, x_range):
return np.polyfit(x.loc[x_range], y.loc[x_range].sort_values(), 1)[0]
return 9.81/(1e3+np.mean(sigma)) * slope(depth, sigma, nc_depth_range)
gb = df.groupby(df.station)
df = df.merge(pd.DataFrame(
dict(
N2_30_60=gb.apply(lambda g: buoyancy_freq(g.sigth, g.depth, g.mld, 30))
)),
on='station')
dfp = df.copy()
df = (df
.loc[df.depth==df.depth.min()]
.rename(columns=dict(nitrate='ntr0'))
)
dfp = dfp.merge(df[['station', 'ntr0']], how='outer')
df.to_pickle('../data/no3-compilation/tmp_compiled-interpolated-derived-per-station.pandas')
dfp.to_pickle('../data/no3-compilation/tmp_compiled-interpolated-derived.pandas')
Based off Peralta-Ferriz & Woodgate and Codispoti et al.
d = [
('Chukchi Sea', smoothen(box(-180, 68, -155, 76))),
('Southern Beaufort', smoothen(box(-155, 68, -115, 72))),
('Canada Basin', smoothen(box(-155, 72, -130, 84))),
('Makarov Basin', smoothen(box(-180, 83.5, -50, 90)).union(smoothen(box(140, 78, 180, 90)))),
('Eurasian Basin', smoothen(box(-30, 82, 140, 90).union(box(110, 78, 140, 82)))),
('Barents Sea', smoothen(box(15, 75, 60, 80).union(box(15, 70.5, 55, 75)))),
('Baffin Bay', smoothen(box(-65, 66, -45, 78))),
('Canadian Archipelago', smoothen(box(-110, 66, -65, 80))), #.union(box(-100, 78, -50, 82))))
('Fram Strait (East)', smoothen(box(-5, 75, 15, 81)))
]
names, geo = zip(*d)
regions = gpd.GeoDataFrame(dict(reg_name=list(names)), geometry=list(geo))
regions.crs = from_epsg(4326)
# regions = regions.to_crs(from_epsg(3413))
regions['reg_idx'] = range(len(d))
regions.to_file('../data/regions/arctic-regions.shp')
... and visualize them:
fname = '../nb_fig/FIGURE_NO3-COMP_regions'
hv.extension('bokeh')
regions = gpd.read_file('../data/regions/arctic-regions.shp')
options_bk = [
opts.Overlay(legend_position='left'),
opts.Polygons(projection=ccrs.NorthPolarStereo(), cmap='Category10', tools=['hover'],
show_legend=True,
frame_width=500, aspect='equal',
line_color=None, alpha=.7,
),
]
add_backend_to_opts(options_bk, 'bokeh')
poly = gv.Polygons(regions, kdims=['Longitude', 'Latitude'], vdims=['reg_name', 'reg_idx'])
l = poly *gf.land * gf.coastline * isobath2000.relabel('2000 m isobath')
l = l.opts(*options_bk)
hv.save(l.opts(toolbar=None), fname, fmt='html')
# manually fix matplotlib legend handler so we can have a holoviews Polygons legend
from matplotlib.collections import PatchCollection
from matplotlib.legend_handler import HandlerPolyCollection
from matplotlib.legend import Legend
Legend.update_default_handler_map({PatchCollection: HandlerPolyCollection()})
hv.extension('matplotlib')
regions = gpd.read_file('../data/regions/arctic-regions.shp')
options_mpl = [
opts.Polygons(
projection=ccrs.NorthPolarStereo(),
show_legend=True, color=None,
),
opts.NdOverlay(show_legend=True),
]
add_backend_to_opts(options_mpl, 'matplotlib')
poly = hv.NdOverlay({
d.reg_name: gv.Polygons(gpd.GeoDataFrame(d.to_frame().transpose()))
for k, d in regions.iterrows()
})
l = poly * gf.land * gf.coastline
l = l.opts(*options_mpl).redim.range(Latitude=(60,90), Longitude=(-180, 180))
# render to matplotlib figure
fig = hv.render(l)
# do final adjustments directly in matplotlib
fig.set_size_inches(9, 6)
ax = fig.axes[0]
circumpolar_axis(ax)
ax.legend_.set_bbox_to_anchor((1, 1))
ax.text(0, 0.99, 'B', transform=ax.transAxes, fontdict={'size':20, 'weight': 'bold'})
for fmt in ['.png', '.pdf']:
fig.savefig(fname+fmt)
fig
fig.savefig(fname+'.png')
fig
df = pd.read_pickle('../data/no3-compilation/tmp_compiled-interpolated-derived-per-station.pandas')
dfp = pd.read_pickle('../data/no3-compilation/tmp_compiled-interpolated-derived.pandas')
def season(timestamp):
mm = timestamp.month
if mm<=4:
return 'winter'
elif mm>=7 and mm<=9:
return 'summer'
def add_info(df):
df = (df
.assign(doy=df.date.dt.dayofyear)
.assign(month=df.date.dt.month)
.assign(year=df.date.dt.year)
)
df['season'] = df.date.apply(season)
return df
df = add_info(df)
gdf = df_to_gdf(df)
gdf = (gpd.sjoin(gdf, regions, op='within', how='left')
.reset_index()
.drop(columns=['index_right', 'index_left', 'index'], errors='ignore')
)
df = pd.DataFrame(gdf).drop(columns=['geometry'])
dfp = dfp.merge(df, how='outer')
dfp = add_info(dfp)
df.to_csv('../data/no3-compilation/database-per-stn.csv', index=False)
dfp.to_csv('../data/no3-compilation/database.csv', index=False)
df.to_feather('../data/no3-compilation/database-per-stn.feather')
dfp.to_feather('../data/no3-compilation/database.feather')