Preamble

This notebook presents the nitrate database compilation. It is centered around Codispoti et al.'s 2013 compilation of historical nitrate measurements in the Arctic Ocean.

All figures exported from this notebook are prefixed with FIGURE_FN-COMP_.

In [ ]:
%run imports.py
%load_ext autoreload
%autoreload 2

# Define dimensions
dims = dict(
    reg_name=hv.Dimension('reg_name', label='Region'),
    ntr0=hv.Dimension('ntr0', label='Surface NO₃ conc.', range=(0,13)),
    doy=hv.Dimension('doy', label='Day of the year')
)

Define individual datasets

Codispoti et al.

First, download the biggest database of vertical profiles nitrate of nitrate concentrations in the Arctic known to us: The one compiled by Codispoti and collaborators and published in 2013: http://dx.doi.org/10.1016/j.pocean.2012.11.006

In [ ]:
!wget -c --read-timeout=5 -P ../data/no3-compilation/ "https://www.nodc.noaa.gov/archive/arc0034/0072133/1.1/data/0-data/Codispoti_Arctic_Nutrients_Submission_11-11-2010.csv"

def load_codispotietal():
    """
    Codispoti et al., 2013
    """
    renamedict = dict(NO3='nitrate', Sal='sal', 
                  T='temp', z='depth', Longitude='lon', 
                  Latitude='lat', Date='date', Station='station', Cruise='cruise'
                 )

    df = (pd.read_csv('../data/no3-compilation/Codispoti_Arctic_Nutrients_Submission_11-11-2010.csv',
                      na_values=-999,
                      parse_dates=['Date'], dtype={'Station': str},
                     )[['Date','Latitude','Longitude','NO3','z','T','Sal','Station','Cruise']]
           .rename(columns=renamedict)
           .dropna(subset=['nitrate'])
          )
    df = df.assign(station=lambda row: row.cruise+'+'+row.station.astype(str))
    df = df.drop(columns=['cruise'])
    df = df.groupby(['station', 'date', 'depth']).mean().reset_index()

    df['p'] = gsw.p_from_z(-df.depth, df.lat)
    df['SA'] = gsw.SA_from_SP(df.sal, df.p, df.lon, df.lat)
    df['CT'] = gsw.CT_from_pt(df.SA, df.temp)
    df['sigth'] = gsw.sigma0(df.SA, df.CT)

    df = df.assign(database='codispoti')
    return df  

Canadian Arctic data

These data from the Canadian Arctic consist mostly of data collected during cruises of ArcticNet, DFO, and some others, and are treated in more detail by Pierre Coupel et al., 2019 (in prep.)

In [ ]:
def load_canadian_arctic():
    fname = '../data/no3-compilation/NUT-ArcticNet_NOW_CATS_Coupel_reduced.csv'
    if os.path.exists(fname):
        df = (
            pd.read_csv(fname, dtype={'Station': str})
            .rename(
                columns=dict(
                    Station='station', Depth='depth', Longitude='lon', Latitude='lat', 
                    Nitrate='nitrate', Temperature='temp', Salinity='sal'
                )
            )
            .dropna(subset=['nitrate'])
        )

        df.Day = df.Day.fillna(15)
        df['date'] = pd.to_datetime(df.Year*10000+df.Month*100+df.Day, format='%Y%m%d')

        df = df.assign(station=lambda row: row.station.astype(str))

        df = df.groupby(['station', 'date', 'depth']).mean().reset_index()

        df['p'] = gsw.p_from_z(-df.depth, df.lat)
        df['SA'] = gsw.SA_from_SP(df.sal, df.p, df.lon, df.lat)
        df['CT'] = gsw.CT_from_pt(df.SA, df.temp)
        df['sigth'] = gsw.sigma0(df.SA, df.CT)

        df = df.drop(columns=['Year', 'Month', 'Day'])
        df = df.assign(database='arcticnet')
        # conform longitudes to interval -180, 180
        df['lon'] = df.lon.where(df.lon<=180, df.lon-360)

        return df
    else:
        print('Data not available locally')
        return pd.DataFrame(columns=[
            'station', 'date', 'depth', 'lon', 'lat', 
            'temp', 'sal', 'nitrate', 'p',
            'SA', 'CT', 'sigth', 'database'
        ])

Arrigo et al., 2017, SUBICE

Winter data are always hard to come by, so let's get some more pre-bloom nitrate concentrations from the Chukchi Sea measured by Arrigo and collaborators and published under https://agupubs.onlinelibrary.wiley.com/doi/abs/10.1002/2017JG003881. These are downloaded from https://datadryad.org/stash/dataset/doi:10.5061/dryad.fm7b5

In [ ]:
!wget -c --read-timeout=5 -O ../data/no3-compilation/SUBICE_high_NO3_hy1.csv "https://datadryad.org/stash/downloads/file_stream/29860"

def load_arrigoetal():
    df = pd.read_csv('../data/no3-compilation/SUBICE_high_NO3_hy1.csv', 
                     na_values=-999, skiprows=9, header=[0,1],
                     dtype={'STNNBR': str},
                    ).replace(to_replace=-999, value=np.nan)
    df.columns = df.columns.droplevel(1)
    df = df.loc[df.BTLDPTH<=10].groupby('STNNBR').mean().reset_index()

    df.DATE = df.DATE.apply(lambda x: pd.to_datetime(str(x), format='%Y%m%d'))

    renamedict = dict(
        DATE='date',LATITUDE='lat', LONGITUDE='lon', NITRAT='nitrate', DEPTH='depth', 
        STNNBR='station', CTDTMP='temp', CTDSAL='sal',
    )

    df = df.rename(columns=renamedict)
    
    df['p'] = gsw.p_from_z(-df.depth, df.lat)
    df['SA'] = gsw.SA_from_SP(df.sal, df.p, df.lon, df.lat)
    df['CT'] = gsw.CT_from_pt(df.SA, df.temp)
    df['sigth'] = gsw.sigma0(df.SA, df.CT)
    
    return df[list(renamedict.values())+['p', 'SA', 'CT', 'sigth']].assign(database='arcticnet')

Compile and postprocess data

Merge all nutrient databases

Check this for duplicates. First, granularize the longitudes and latitudes for comparison, then check that no profiles overlap.

In [ ]:
def get_timelatlon_bin(df):
    df['lonbin'] = pd.to_numeric(pd.cut(df.lon, np.arange(-180, 180, 0.1), labels=0.05+np.arange(-180, 180, 0.1)[:-1]))
    df['latbin'] = pd.to_numeric(pd.cut(df.lat, np.arange(60, 90, 0.05), labels=0.025+np.arange(60, 90, 0.05)[:-1]))

    # one representative entry for each profile
    return df.groupby(['date', 'lonbin', 'latbin']).first()

stns_canadian_arctic = get_timelatlon_bin(load_canadian_arctic())
stns_codispotietal = get_timelatlon_bin(load_codispotietal())

canadian_arctic_stations_to_merge = stns_canadian_arctic.station.loc[
    stns_canadian_arctic.index
    .drop(
        stns_codispotietal.index,
        errors='ignore',
    )
].values

Now, we can select those data from the Canadian Arctic that will not duplicate any data that's already in the Codispoti et al. database.

In [ ]:
canadian_arctic = load_canadian_arctic()
canadian_arctic_to_merge = canadian_arctic.loc[canadian_arctic.station.isin(canadian_arctic_stations_to_merge)]

This makes us drop this many records:

In [ ]:
len(canadian_arctic) - len(canadian_arctic_to_merge)
In [ ]:
dfl = [
    load_codispotietal(),
    load_arrigoetal(),
    canadian_arctic_to_merge
]

df = pd.concat(dfl, sort=False).reset_index()
df.to_pickle('../data/no3-compilation/tmp_compiled.pandas')

Station-wise depth interpolation [This step takes some time]

In [ ]:
df = pd.read_pickle('../data/no3-compilation/tmp_compiled.pandas')

def groupwise_interp(df):
    if df.depth.min()>20:
        return None
    else:
        bins = np.arange(-1,301.1,2)
        labels = bins[:-1]+np.diff(bins)/2
        df.depth = pd.to_numeric(pd.cut(df.depth, bins=bins, labels=labels))
        df = (df
              .dropna(subset=['depth'])
              .groupby('depth').mean()
              .reindex(index=labels)
             )
        return df.interpolate(method='linear', limit_area='inside').fillna(method='bfill').drop(
            columns=['station', 'date', 'depth'], errors='ignore')
    
df = (df.groupby(['database', 'station', 'date'])
      .apply(groupwise_interp)
      .reset_index()
     )

df.to_pickle('../data/no3-compilation/tmp_compiled-interpolated.pandas')

Derive per-profile quantities

In the following, we derive a number of quantities for each profile. Not all of them made it into the final article, but we left them in here for further analyses that might pop up in the future.

In [ ]:
df = pd.read_pickle('../data/no3-compilation/tmp_compiled-interpolated.pandas')
df = df.loc[df.database.isin(['codispoti', 'arcticnet'])]

MLD

In [ ]:
def find_ml (sigth, depth, delta_sigth_crit=0.1):
    """
    Find mixed layer with sfc. density+0.1 kg/m3 criterion.
    """
    index = np.where(sigth>np.nanmean(sigth.iloc[:5]) + delta_sigth_crit)[0]
    if len(index)>0:
        return depth.iloc[index[0]].astype(float)
    else:
        return np.nan

df = df.merge(pd.DataFrame(
              df.groupby(df.station).apply(lambda g: find_ml(g.sigth, g.depth)),
              columns=['mld']),on='station')

Nitracline

There are two relevant nitraclines:

  1. nc0: Biologically meaningful, when NO3 jumps over say 1uM, as it indicates the zone of nitrate depletion (if there is one)
  2. nc: Physically meaningful, when NO3 jumps over say sfc.NO3+1uM, as it indicates water mass transitions
In [ ]:
def find_nitracline0(no3, depth, no3crit=1.0):
    index = np.where(no3>=no3crit)[0]
    if len(index)>0:
        return depth.iloc[index[0]].astype(float)
    else:
        return np.nan
    
def find_nitracline(no3, depth):
    no3sfc = no3.loc[depth<=10].mean()
    index = np.where(no3>no3sfc+1.)[0]
    if len(index)>0:
        return depth.iloc[index[0]].astype(float)
    else:
        return np.nan
    
gb = df.groupby(df.station)
df = df.merge(pd.DataFrame(
    dict(nc0=gb.apply(lambda g: find_nitracline0(g.nitrate, g.depth, 1)),
         nc =gb.apply(lambda g: find_nitracline(g.nitrate, g.depth))
        )),on='station')

Stratification

In [ ]:
# 40-100 m density difference

sigth0 = df.loc[df.depth.between(35, 45)].groupby('station').mean().sigth
sigth_deep = df.loc[df.depth.between(95, 105)].groupby('station').mean().sigth

df = df.merge(pd.DataFrame(
    dict(delta_sigth=(sigth_deep-sigth0).values, station=sigth0.index.values)
))
In [ ]:
def buoyancy_freq(sigma, depth, from_depth, layer_thickness=30):
    """
    Calculate buoyancy frequency
    over depth range [from_depth, from_depth+layer_thickness].
    """
    nc_depth_range = (from_depth<=depth) & (depth<=from_depth+layer_thickness)
    if not np.any(nc_depth_range):
        return np.nan
    else:
        def slope(x, y, x_range):
            return np.polyfit(x.loc[x_range], y.loc[x_range].sort_values(), 1)[0]

        return 9.81/(1e3+np.mean(sigma)) * slope(depth, sigma, nc_depth_range)

gb = df.groupby(df.station)
df = df.merge(pd.DataFrame(
    dict(
        N2_30_60=gb.apply(lambda g: buoyancy_freq(g.sigth, g.depth, g.mld, 30))
    )),
    on='station')

Save database

In [ ]:
dfp = df.copy()
df = (df
       .loc[df.depth==df.depth.min()]
       .rename(columns=dict(nitrate='ntr0'))
      )

dfp = dfp.merge(df[['station', 'ntr0']], how='outer')
In [ ]:
df.to_pickle('../data/no3-compilation/tmp_compiled-interpolated-derived-per-station.pandas')
dfp.to_pickle('../data/no3-compilation/tmp_compiled-interpolated-derived.pandas')

Add regions

Based off Peralta-Ferriz & Woodgate and Codispoti et al.

Define regions

In [ ]:
d = [
    ('Chukchi Sea', smoothen(box(-180, 68, -155, 76))), 
    ('Southern Beaufort', smoothen(box(-155, 68, -115, 72))), 
    ('Canada Basin', smoothen(box(-155, 72, -130, 84))), 
    ('Makarov Basin', smoothen(box(-180, 83.5, -50, 90)).union(smoothen(box(140, 78, 180, 90)))), 
    ('Eurasian Basin', smoothen(box(-30, 82, 140, 90).union(box(110, 78, 140, 82)))), 
    ('Barents Sea', smoothen(box(15, 75, 60, 80).union(box(15, 70.5, 55, 75)))),
    ('Baffin Bay', smoothen(box(-65, 66, -45, 78))),
    ('Canadian Archipelago', smoothen(box(-110, 66, -65, 80))), #.union(box(-100, 78, -50, 82))))
    ('Fram Strait (East)', smoothen(box(-5, 75, 15, 81)))
]

names, geo = zip(*d)

regions = gpd.GeoDataFrame(dict(reg_name=list(names)), geometry=list(geo))
regions.crs = from_epsg(4326)

# regions = regions.to_crs(from_epsg(3413))

regions['reg_idx'] = range(len(d))

regions.to_file('../data/regions/arctic-regions.shp')

... and visualize them:

In [ ]:
fname = '../nb_fig/FIGURE_NO3-COMP_regions'

hv.extension('bokeh')

regions = gpd.read_file('../data/regions/arctic-regions.shp')
options_bk = [
    opts.Overlay(legend_position='left'),
    opts.Polygons(projection=ccrs.NorthPolarStereo(), cmap='Category10', tools=['hover'], 
                  show_legend=True, 
                  frame_width=500, aspect='equal',
                  line_color=None, alpha=.7,
                 ),
]
add_backend_to_opts(options_bk, 'bokeh')

poly = gv.Polygons(regions, kdims=['Longitude', 'Latitude'], vdims=['reg_name', 'reg_idx'])
l = poly *gf.land * gf.coastline * isobath2000.relabel('2000 m isobath')
l = l.opts(*options_bk)

hv.save(l.opts(toolbar=None), fname, fmt='html')
In [ ]:
# manually fix matplotlib legend handler so we can have a holoviews Polygons legend

from matplotlib.collections import PatchCollection
from matplotlib.legend_handler import HandlerPolyCollection
from matplotlib.legend import Legend

Legend.update_default_handler_map({PatchCollection: HandlerPolyCollection()})

hv.extension('matplotlib')
regions = gpd.read_file('../data/regions/arctic-regions.shp')
In [ ]:
options_mpl = [
    opts.Polygons(
        projection=ccrs.NorthPolarStereo(), 
        show_legend=True, color=None,
    ),
    opts.NdOverlay(show_legend=True),
]
add_backend_to_opts(options_mpl, 'matplotlib')

poly = hv.NdOverlay({
    d.reg_name: gv.Polygons(gpd.GeoDataFrame(d.to_frame().transpose()))
    for k, d in regions.iterrows()
})

l = poly * gf.land * gf.coastline
l = l.opts(*options_mpl).redim.range(Latitude=(60,90), Longitude=(-180, 180))

# render to matplotlib figure
fig = hv.render(l)

# do final adjustments directly in matplotlib
fig.set_size_inches(9, 6)

ax = fig.axes[0]
circumpolar_axis(ax)
ax.legend_.set_bbox_to_anchor((1, 1))
ax.text(0, 0.99, 'B', transform=ax.transAxes, fontdict={'size':20, 'weight': 'bold'})

for fmt in ['.png', '.pdf']:
    fig.savefig(fname+fmt)
    
fig
In [ ]:
fig.savefig(fname+'.png')
fig

Add ancillary vars and regionalize dataframe

In [ ]:
df = pd.read_pickle('../data/no3-compilation/tmp_compiled-interpolated-derived-per-station.pandas')
dfp = pd.read_pickle('../data/no3-compilation/tmp_compiled-interpolated-derived.pandas')

def season(timestamp):
    mm = timestamp.month
    if mm<=4:
        return 'winter'
    elif mm>=7 and mm<=9:
        return 'summer'
    
def add_info(df):
    df = (df
          .assign(doy=df.date.dt.dayofyear)
          .assign(month=df.date.dt.month)
          .assign(year=df.date.dt.year)
     )
    df['season'] = df.date.apply(season)
    return df

df = add_info(df)

gdf = df_to_gdf(df)
gdf = (gpd.sjoin(gdf, regions, op='within', how='left')
        .reset_index()
        .drop(columns=['index_right', 'index_left', 'index'], errors='ignore')
       )
df = pd.DataFrame(gdf).drop(columns=['geometry'])

dfp = dfp.merge(df, how='outer')
dfp = add_info(dfp)

df.to_csv('../data/no3-compilation/database-per-stn.csv', index=False)
dfp.to_csv('../data/no3-compilation/database.csv', index=False)
df.to_feather('../data/no3-compilation/database-per-stn.feather')
dfp.to_feather('../data/no3-compilation/database.feather')