Module AqEquil.AqSpeciation
Expand source code
DEBUGGING_R = True
import os
import re
import sys
import shutil
import copy
import collections
import dill
import math
import warnings
import subprocess
import pkg_resources
import pandas as pd
import numpy as np
from chemparse import parse_formula
from IPython.core.display import display, HTML
# matplotlib for static plots
import matplotlib
import matplotlib.pyplot as plt
import matplotlib.cm as cm
import plotly.express as px
import plotly.io as pio
from plotly.subplots import make_subplots
import plotly.graph_objects as go
# rpy2 for Python and R integration
import rpy2.rinterface_lib.callbacks
import logging
rpy2.rinterface_lib.callbacks.logger.setLevel(logging.ERROR) # will display errors, but not warnings
import rpy2.robjects as ro
from rpy2.robjects import pandas2ri
pandas2ri.activate()
class Error_Handler:
"""
Handles how errors are printed in Jupyter notebooks. By default, errors that
are handled by AqEquil are printed with an error message, but no traceback.
Errors that are not handled by AqEquil, such as those thrown if the user
encounters a bug, will display a full traceback.
If the error handler prints an error message without traceback, all future
errors regardless of origin will be shown without traceback until the
notebook kernel is restarted.
Attributes
----------
clean : bool
Report exceptions without traceback? If True, only the error message is
shown. If False, the entire error message, including traceback, is
shown. Ignored if AqEquil is not being run in a Jupyter notebook.
"""
def __init__(self, clean=True):
self.clean = clean # bool: hide traceback?
pass
@staticmethod
def hide_traceback(exc_tuple=None, filename=None, tb_offset=None,
exception_only=False, running_compiled_code=False):
"""
Return a modified ipython showtraceback function that does not display
traceback when encountering an error.
"""
ipython = get_ipython()
etype, value, tb = sys.exc_info()
value.__cause__ = None # suppress chained exceptions
return ipython._showtraceback(etype, value, ipython.InteractiveTB.get_exception_only(etype, value))
def raise_exception(self, msg):
"""
Raise an exception that displays the error message without traceback. This
happens only when the exception is predicted by the AqEquil package
(e.g., for common user errors).
"""
if self.clean and isnotebook():
ipython = get_ipython()
ipython.showtraceback = self.hide_traceback
raise Exception(msg)
def load(filename, messages=True, hide_traceback=True):
"""
Load a speciation file.
Parameters
----------
filename : str
Name of the speciation file.
Returns
----------
An object of class `Speciation`.
"""
err_handler = Error_Handler(clean=hide_traceback)
if len(filename) <= 12:
print("Attempting to load "+str(filename)+".speciation ...")
filename = filename+".speciation"
if 'speciation' in filename[-11:]:
if os.path.exists(filename) and os.path.isfile(filename):
pass
else:
err = "Cannot locate input file {}/{}".format(os.getcwd(), filename)
err_handler.raise_exception(err)
else:
err = ("Input file {}".format(filename) + " "
"must be in {} format.".format(ext_dict[ext]))
err_handler.raise_exception(err)
if os.path.getsize(filename) > 0:
with open(filename, 'rb') as handle:
speciation = dill.load(handle)
if messages:
print("Loaded '{}'".format(filename))
return speciation
else:
msg = "Cannot open " + str(filename) + " because the file is empty."
err_handler.raise_exception(msg)
def isnotebook():
"""
Check if this code is running in a Jupyter notebook
"""
try:
shell = get_ipython().__class__.__name__
if shell == 'ZMQInteractiveShell':
return True # Jupyter notebook or qtconsole
elif shell == 'TerminalInteractiveShell':
return False # Terminal running IPython
else:
return False # Other type (?)
except NameError:
return False # Probably standard Python interpreter
def float_to_fraction (x, error=0.000001):
"""
Convert a float into a fraction. Works with floats like 2.66666666.
Solution from https://stackoverflow.com/a/5128558/8406195
"""
n = int(math.floor(x))
x -= n
if x < error:
return (n, 1)
elif 1 - error < x:
return (n+1, 1)
# The lower fraction is 0/1
lower_n = 0
lower_d = 1
# The upper fraction is 1/1
upper_n = 1
upper_d = 1
while True:
# The middle fraction is (lower_n + upper_n) / (lower_d + upper_d)
middle_n = lower_n + upper_n
middle_d = lower_d + upper_d
# If x + error < middle
if middle_d * (x + error) < middle_n:
# middle is our new upper
upper_n = middle_n
upper_d = middle_d
# Else If middle < x - error
elif middle_n < (x - error) * middle_d:
# middle is our new lower
lower_n = middle_n
lower_d = middle_d
# Else middle is our best fraction
else:
return (n * middle_d + middle_n, middle_d)
def float_to_formatted_fraction(x, error=0.000001):
"""
Format a fraction for html.
"""
f = float_to_fraction(x, error=error)
whole_number_float = int((f[0]-(f[0]%f[1]))/f[1])
remainder_tuple = (f[0]%f[1], f[1])
if remainder_tuple[0] == 0:
return str(whole_number_float)
else:
if whole_number_float == 0:
whole_number_float = ""
return "{0}<sup>{1}</sup>⁄<sub>{2}</sub>".format(whole_number_float, remainder_tuple[0], remainder_tuple[1])
def format_coeff(coeff):
"""
Format a reaction coefficient for html.
"""
if coeff == 1 or coeff == -1:
coeff = ""
elif coeff.is_integer() and coeff < 0:
coeff = str(-int(coeff))
elif coeff.is_integer() and coeff > 0:
coeff = str(int(coeff))
else:
if coeff < 0:
coeff = float_to_formatted_fraction(-coeff)
else:
coeff = float_to_formatted_fraction(coeff)
if coeff != "":
coeff = coeff + " "
return coeff
def flatten_list(_2d_list):
"""
Flatten a list of lists.
Code from: https://stackabuse.com/python-how-to-flatten-list-of-lists/
"""
flat_list = []
# Iterate through the outer list
for element in _2d_list:
if type(element) is list:
# If the element is of type list, iterate through the sublist
for item in element:
flat_list.append(item)
else:
flat_list.append(element)
return flat_list
def convert_to_RVector(value, force_Rvec=True):
"""
Convert a value or list into an R vector of the appropriate type.
Parameters
----------
value : numeric or str, or list of numeric or str
Value to be converted.
force_Rvec : bool, default True
If `value` is not a list, force conversion into a R vector?
False will return an int, float, or str if value is non-list.
True will always return an R vector.
Returns
-------
int, float, str, or an rpy2 R vector
A value or R vector of an appropriate data type.
"""
if not isinstance(value, list) and not force_Rvec:
return value
elif not isinstance(value, list) and force_Rvec:
value = [value]
else:
pass
if all(isinstance(x, bool) for x in value):
return ro.BoolVector(value)
elif all(isinstance(x, int) for x in value):
return ro.IntVector(value)
elif all(isinstance(x, float) or isinstance(x, int) for x in value):
return ro.FloatVector(value)
else:
return ro.StrVector([str(v) for v in value])
def get_colors(colormap, ncol, alpha=1.0, hide_traceback=True):
"""
Get a list of rgb values for a matplotlib colormap
Parameters
----------
colormap : str, default "WORM"
Name of the colormap to color the scatterpoints. Accepts "WORM",
"colorblind", or matplotlib colormaps.
See https://matplotlib.org/stable/tutorials/colors/colormaps.html
The "colorblind" colormap is referenced from Wong, B. Points of view:
Color blindness. Nat Methods 8, 441 (2011).
https://doi.org/10.1038/nmeth.1618
ncol : int
Number of colors to return in the list.
alpha : float, default 1.0
An alpha value between 0.0 (transparent) and 1.0 (opaque).
Returns
-------
colors : list
A list of rgb color tuples
"""
err_handler = Error_Handler(clean=hide_traceback)
qualitative_cmaps = ['Pastel1', 'Pastel2', 'Paired', 'Accent',
'Dark2', 'Set1', 'Set2', 'Set3',
'tab10', 'tab20', 'tab20b', 'tab20c']
if colormap == "colorblind":
# colors from Wong B. 2011, https://doi.org/10.1038/nmeth.1618
colors = [(0, 0, 0, alpha), # black
(230/255, 159/255, 0, alpha), # orange
(86/255, 180/255, 233/255, alpha), # sky blue
(0, 158/255, 115/255, alpha), # bluish green
(240/255, 228/255, 66/255, alpha), # yellow
(0, 114/255, 178/255, alpha), # blue
(213/255, 94/255, 0, alpha), # vermillion
(204/255, 121/255, 167/255, alpha)] # reddish purple
if ncol <= len(colors):
return colors[:ncol]
else:
print("Switching from 'colorblind' colormap to 'viridis' because there are {} variables to plot.".format(ncol))
colormap = "viridis"
elif colormap == "WORM":
colors = [(0, 0, 0, alpha), # black
(22/255, 153/255, 211/255, alpha), # blue
(232/255, 86/255, 66/255, alpha), # red
(245/255, 171/255, 80/255, alpha), # orange
(115/255, 108/255, 168/255, alpha), # purple
(151/255, 208/255, 119/255, alpha), # green
(47/255, 91/255, 124/255, alpha), # dark blue
(119/255, 119/255, 119/255, alpha)] # gray
if ncol <= len(colors):
return colors[:ncol]
else:
print("Switching from 'WORM' colormap to 'viridis' because there are {} variables to plot.".format(ncol))
colormap = "viridis"
if colormap in qualitative_cmaps:
# handle qualitative (non-continuous) colormaps
colors = [plt.cm.__getattribute__(colormap).colors[i] for i in range(ncol)]
colors = [(c[0], c[1], c[2], alpha) for c in colors]
else:
# handle sequential (continuous) colormaps
norm = matplotlib.colors.Normalize(vmin=0, vmax=ncol-1)
try:
cmap = cm.__getattribute__(colormap)
except:
valid_colormaps = [cmap for cmap in dir(cm) if "_" not in cmap and cmap not in ["LUTSIZE", "MutableMapping", "ScalarMappable", "functools", "datad", "revcmap"]]
err_handler.raise_exception("'{}'".format(colormap)+" is not a recognized matplotlib colormap. "
"Try one of these: {}".format(valid_colormaps))
m = cm.ScalarMappable(norm=norm, cmap=cmap)
colors = [m.to_rgba(i) for i in range(ncol)]
colors = [(c[0], c[1], c[2], alpha) for c in colors]
return colors
def html_chemname_format_AqEquil(name, charge_sign_at_end=False):
"""
AqEquil-specific formatting of chemical names for html. Takes "_(input)"
into account when formatting names.
Parameters
----------
name : str
A chemical formula.
Returns
-------
A formatted chemical formula string.
"""
# format only the first part of the name if it has "_(input)"
if len(name.split("_(input)"))==2:
if name.split("_(input)")[1] == '':
name = name.split("_(input)")[0]
input_flag=True
else:
input_flag = False
name = html_chemname_format(name, charge_sign_at_end=charge_sign_at_end)
# add " (input)" to the end of the name
if input_flag:
name = name+" (input)"
return(name)
def html_chemname_format(name, charge_sign_at_end=False):
"""
Format a chemical formula to display subscripts and superscripts in HTML
(e.g., Plotly plots)
Example, "CH3COO-" becomes "CH<sub>3</sub>COO<sup>-</sup>"
Parameters
----------
name : str
A chemical formula.
charge_sign_at_end : bool, default False
Display charge with sign after the number (e.g. SO4 2-)?
Returns
-------
A formatted chemical formula string.
"""
p = re.compile(r'(?P<sp>[-+]\d*?$)')
name = p.sub(r'<sup>\g<sp></sup>', name)
charge = re.search(r'<.*$', name)
name_no_charge = re.match(r'(?:(?!<|$).)*', name).group(0)
mapping = {"0": "<sub>0</sub>", "1": "<sub>1</sub>", "2": "<sub>2</sub>", "3": "<sub>3</sub>", "4": "<sub>4</sub>",
"5": "<sub>5</sub>", "6": "<sub>6</sub>", "7": "<sub>7</sub>", "8": "<sub>8</sub>", "9": "<sub>9</sub>",
".":"<sub>.</sub>"}
name_no_charge_formatted = "".join([mapping.get(x) or x for x in list(name_no_charge)])
if charge != None:
name = name_no_charge_formatted + charge.group(0)
else:
name = name_no_charge_formatted
if charge_sign_at_end:
if "<sup>-" in name:
name = name.replace("<sup>-", "<sup>")
name = name.replace("</sup>", "-</sup>")
if "<sup>+" in name:
name = name.replace("<sup>+", "<sup>")
name = name.replace("</sup>", "+</sup>")
return(name)
class Speciation(object):
"""
Stores the output of a speciation calculation.
Attributes
----------
input : pd.DataFrame
Pandas dataframe containing user-supplied sample chemistry data.
mass_contribution : pd.DataFrame
Pandas dataframe containing basis species contributions to mass balance
of aqueous species.
batch_3o : rpy2 ListVector
An rpy2 ListVector (R object) containing speciation results, in case
analysis in R is preferred.
report : pd.DataFrame
Pandas dataframe reporting major results of speciation calculation in
across all samples.
report_divs : rpy2 ListVector
An rpy2 ListVector of column names within the different sections of the
speciation report.
sample_data : dict
Dictionary with sample names as keys and speciation results as values.
"""
def __init__(self, args, hide_traceback=True):
self.err_handler = Error_Handler(clean=hide_traceback)
self.reactions_for_plotting = None # stores formatted reactions for plotting results of affinity and energy supply calculations
for k in args:
setattr(self, k, args[k])
def __getitem__(self, item):
return getattr(self, item)
@staticmethod
def __unique(seq):
"""
Provide a sequence, get a list of non-repeating elements in the same order.
"""
seen = set()
seen_add = seen.add
return [x for x in seq if not (x in seen or seen_add(x))]
def save(self, filename, messages=True):
"""
Save the speciation as a '.speciation' file to your current working
directory. This file can be loaded with `AqEquil.load(filename)`.
Parameters
----------
filename : str
The desired name of the file.
messages : str
Print a message confirming the save?
"""
if filename[-11:] != '.speciation':
filename = filename + '.speciation'
with open(filename, 'wb') as handle:
dill.dump(self, handle, protocol=dill.HIGHEST_PROTOCOL)
if messages:
print("Saved as '{}'".format(filename))
@staticmethod
def __save_figure(fig, save_as, save_format, save_scale, plot_width, plot_height, ppi):
if isinstance(save_format, str) and save_format not in ['png', 'jpg', 'jpeg', 'webp', 'svg', 'pdf', 'eps', 'json', 'html']:
self.err_handler.raise_exception("{}".format(save_format)+" is an unrecognized "
"save format. Supported formats include 'png', "
"'jpg', 'jpeg', 'webp', 'svg', 'pdf', 'eps', "
"'json', or 'html'")
if isinstance(save_format, str):
if not isinstance(save_as, str):
save_as = "newplot"
if save_format=="html":
fig.write_html(save_as+".html")
print("Saved figure as {}".format(save_as)+".html")
save_format = 'png'
elif save_format in ['pdf', 'eps', 'json']:
pio.write_image(fig, save_as+"."+save_format, format=save_format, scale=save_scale,
width=plot_width*ppi, height=plot_height*ppi)
print("Saved figure as {}".format(save_as)+"."+save_format)
save_format = "png"
else:
pio.write_image(fig, save_as+"."+save_format, format=save_format, scale=save_scale,
width=plot_width*ppi, height=plot_height*ppi)
print("Saved figure as {}".format(save_as)+"."+save_format)
else:
save_format = "png"
return save_as, save_format
@staticmethod
def __get_unit_info(subheader):
unit_name_dict = {
"pH" : ("", "pH"),
"ppm" : ("", "ppm"),
"ppb" : ("", "ppb"),
"mg/L" : ("", "mg/L"),
"degC" : ("temperature", "°C"),
"log_molality" : ("log molality", "log(mol/kg)"),
"Molality" : ("molality", "mol/kg"),
"molality" : ("molality", "mol/kg"),
"molal" : ("molality", "mol/kg"),
"log_activity" : ("log activity", ""),
"Log activity" : ("log activity", ""),
"mg/kg.sol" : ("", "mg solute per kg solution"),
"Alk., eq/kg.H2O" : ("alkalinity", "eq/kg"),
"Alk., eq/L" : ("alkalinity", "eq/L"),
"Alk., eq/kg.sol" : ("alkalinity", "eq/kg solution"),
"Alk., mg/L CaCO3" : ("alkalinity", "mg/L CaCO3"),
"Alk., mg/L HCO3-" : ("alkalinity", "mg/L HCO3-"),
"pX" : ("-(log activity)", "-log(mol/kg)"),
"activity" : ("activity", ""),
"log_gamma" : ("log gamma", "log(kg/mol)"),
"gamma" : ("gamma", "kg/mol"),
"affinity_kcal" : ("affinity", "kcal/mol"),
"%" : ("", "%"),
"Eh_volts" : ("Eh", "volts"),
"eq/kg.H2O" : ("charge", "eq/kg"),
"logfO2" : ("", ""),
"cal/mol e-" : ("affinity", "cal/mol e-"),
"cal/kg.H2O" : ("energy supply", "cal/kg H2O"),
"Log ion-H+ activity ratio" : ("Log ion-H+ activity ratio", ""),
"log_fugacity" : ("log fugacity", "log(bar)"),
"fugacity" : ("fugacity", "bar"),
"bar" : ("", "bar"),
}
out = unit_name_dict.get(subheader)
return out[0], out[1]
def lookup(self, col=None):
"""
Look up desired columns in the speciation report.
Parameters
----------
col : str or list of str
Leave blank to get a list of section names in the report:
```speciation.lookup()```
Provide the name of a section to look up the names of columns in
that section of the report:
```speciation.lookup("aq_distribution")```
Provide a column name (or a list of column names) to retrieve the
column from the report:
```speciation.lookup(["Temperature", "O2"])```
Returns
----------
Pandas dataframe or list of str
If a column name (or list of column names) is provided, returns the
speciation report with only the desired column(s). Otherwise returns
a list of section names (if no arguments are provided), or a list of
columns in a section (if a section name is provided).
"""
names_length = len(self.report_divs.names)
if col==None and names_length>0:
return list(self.report_divs.names)
if names_length>0:
if col in list(self.report_divs.names):
return list(self.report_divs.rx2(col))
if isinstance(col, str):
col = [col]
return self.report.iloc[:, self.report.columns.get_level_values(0).isin(set(col))]
def __convert_aq_units_to_log_friendly(self, species, rows):
col_data = self.lookup(species)
col_data = col_data.loc[rows]
if col_data.columns.get_level_values(1) == 'log_activity':
y = [10**float(s[0]) if s[0] != 'NA' else float("nan") for s in col_data.values.tolist()]
out_unit = 'activity'
elif col_data.columns.get_level_values(1) == 'log_molality':
y = [10**float(s[0]) if s[0] != 'NA' else float("nan") for s in col_data.values.tolist()]
out_unit = 'molality'
elif col_data.columns.get_level_values(1) == 'log_gamma':
y = [10**float(s[0]) if s[0] != 'NA' else float("nan") for s in col_data.values.tolist()]
out_unit = 'gamma'
elif col_data.columns.get_level_values(1) == 'log_fugacity':
y = [10**float(s[0]) if s[0] != 'NA' else float("nan") for s in col_data.values.tolist()]
out_unit = 'fugacity'
else:
y = [float(s[0]) if s[0] != 'NA' else float("nan") for s in col_data.values.tolist()]
out_unit = col_data.columns.get_level_values(1)[0]
return y, out_unit
def plot_mineral_saturation(self, sample_name, title=None,
mineral_sat_type="affinity",
plot_width=4, plot_height=3, ppi=122,
colors=["blue", "orange"],
save_as=None, save_format=None, save_scale=1,
interactive=True, hide_traceback=True):
"""
Vizualize mineral saturation states in a sample as a bar plot.
Parameters
----------
sample_name : str
Name of the sample to plot.
title : str, optional
Title of the plot.
mineral_sat_type : str, default "affinity"
Metric for mineral saturation state to plot. Can be "affinity" or
"logQoverK".
colors : list of two str, default ["blue", "orange"]
Sets the color of the bars representing supersaturated
and undersaturated states, respectively.
save_as : str, optional
Provide a filename to save this figure. Filetype of saved figure is
determined by `save_format`.
Note: interactive plots can be saved by clicking the 'Download plot'
button in the plot's toolbar.
save_format : str, default "png"
Desired format of saved or downloaded figure. Can be 'png', 'jpg',
'jpeg', 'webp', 'svg', 'pdf', 'eps', 'json', or 'html'. If 'html',
an interactive plot will be saved. Only 'png', 'svg', 'jpeg',
and 'webp' can be downloaded with the 'download as' button in the
toolbar of an interactive plot.
save_scale : numeric, default 1
Multiply title/legend/axis/canvas sizes by this factor when saving
the figure.
interactive : bool, default True
Return an interactive plot if True or a static plot if False.
"""
if sample_name not in self.report.index:
msg = ("Could not find '{}'".format(sample_name)+" among sample "
"names in the speciation report. Sample names include "
"{}".format(list(self.report.index)))
err_handler.raise_exception(msg)
if isinstance(self.sample_data[sample_name].get('mineral_sat', None), pd.DataFrame):
mineral_data = self.sample_data[sample_name]['mineral_sat'][mineral_sat_type].astype(float).sort_values(ascending=False)
x = mineral_data.index
else:
msg = ("This sample does not have mineral saturation state data."
"To generate this data, ensure get_mineral_sat=True when "
"running speciate(), or ensure this sample has "
"mineral-forming basis species.")
self.err_handler.raise_exception(msg)
color_list = [colors[0] if m >= 0 else colors[1] for m in mineral_data]
if mineral_sat_type == "affinity":
ylabel = 'affinity, kcal/mol'
if mineral_sat_type == "logQoverK":
ylabel = 'logQ/K'
if title==None:
title = sample_name + " mineral saturation index"
df = pd.DataFrame(mineral_data)
fig = px.bar(df, x=df.index, y="affinity",
height=plot_height*ppi, width=plot_width*ppi,
labels={'affinity': ylabel}, template="simple_white")
fig.update_traces(hovertemplate = "%{x} <br>"+ylabel+": %{y}",
marker_color=color_list)
fig.update_layout(xaxis_tickangle=-45, xaxis_title=None,
title={'text':title, 'x':0.5, 'xanchor':'center'},
margin={"t":40},
xaxis={'fixedrange':True},
yaxis={'fixedrange':True, 'exponentformat':'power'})
save_as, save_format = self.__save_figure(fig, save_as, save_format,
save_scale, plot_width,
plot_height, ppi)
config = {'displaylogo': False,
'modeBarButtonsToRemove': ['zoom2d', 'pan2d', 'select2d',
'lasso2d', 'zoomIn2d', 'zoomOut2d',
'autoScale2d', 'resetScale2d',
'toggleSpikelines'],
'toImageButtonOptions': {
'format': save_format, # one of png, svg, jpeg, webp
'filename': save_as,
'height': plot_height*ppi,
'width': plot_width*ppi,
'scale': save_scale,
},
}
if not interactive:
config['staticPlot'] = True
fig.show(config=config)
def barplot(self, y, title=None, convert_log=True, show_missing=True,
plot_width=4, plot_height=3, ppi=122, colormap="WORM",
save_as=None, save_format=None, save_scale=1,
interactive=True):
"""
Show a bar plot to vizualize one or more variables across all samples.
Parameters
----------
y : str or list of str
Name (or list of names) of the variables to plot. Valid variables
are columns in the speciation report.
title : str, optional
Title of the plot.
convert_log : bool, default True
Convert units "log_activity", "log_molality", "log_gamma", and
"log_fugacity" to "activity", "molality", "gamma", and "fugacity",
respectively?
show_missing : bool, default True
Show samples that do not have bars?
plot_width, plot_height : numeric, default 4 by 3
Width and height of the plot, in inches.
ppi : numeric, default 122
Pixels per inch. Along with `plot_width` and `plot_height`,
determines the size of interactive plots.
colormap : str, default "WORM"
Name of the colormap to color the scatterpoints. Accepts "WORM",
"colorblind", or matplotlib colormaps.
See https://matplotlib.org/stable/tutorials/colors/colormaps.html
The "colorblind" colormap is referenced from Wong, B. Points of view:
Color blindness. Nat Methods 8, 441 (2011).
https://doi.org/10.1038/nmeth.1618
save_as : str, optional
Provide a filename to save this figure. Filetype of saved figure is
determined by `save_format`.
Note: interactive plots can be saved by clicking the 'Download plot'
button in the plot's toolbar.
save_format : str, default "png"
Desired format of saved or downloaded figure. Can be 'png', 'jpg',
'jpeg', 'webp', 'svg', 'pdf', 'eps', 'json', or 'html'. If 'html',
an interactive plot will be saved. Only 'png', 'svg', 'jpeg',
and 'webp' can be downloaded with the 'download as' button in the
toolbar of an interactive plot.
save_scale : numeric, default 1
Multiply title/legend/axis/canvas sizes by this factor when saving
the figure.
interactive : bool, default True
Return an interactive plot if True or a static plot if False.
"""
if not isinstance(y, list):
y = [y]
colors = get_colors(colormap, len(y))
# convert rgba to hex
colors = [matplotlib.colors.rgb2hex(c) for c in colors]
# map each species to its color, e.g.,
# {'CO2': '#000000', 'HCO3-': '#1699d3', 'Other': '#736ca8'}
dict_species_color = {sp:color for sp,color in zip(y, colors)}
# html format color dict key names
dict_species_color = {html_chemname_format_AqEquil(k):v for k,v in dict_species_color.items()}
y_cols = self.lookup(y)
if not show_missing:
y_cols = y_cols.dropna(how='all') # this df will keep subheaders
x = y_cols.index # names of samples
df = self.lookup(["name"]+y).copy()
if not show_missing:
df = df.dropna(how='all') # this df will lose subheaders (flattened)
df.loc[:, "name"] = df.index
df.columns = df.columns.get_level_values(0)
for i, yi in enumerate(y):
y_col = y_cols.iloc[:, y_cols.columns.get_level_values(0)==yi]
try:
subheader = y_col.columns.get_level_values(1)[0]
except:
msg = ("Could not find '{}' ".format(yi)+"in the speciation "
"report. Available variables include "
"{}".format(list(set(self.report.columns.get_level_values(0)))))
self.err_handler.raise_exception(msg)
try:
unit_type, unit = self.__get_unit_info(subheader)
except:
unit_type = ""
unit = ""
try:
y_vals = [float(y0[0]) if y0[0] != 'NA' else float("nan") for y0 in y_col.values.tolist()]
except:
msg = ("One or more the values belonging to "
"'{}' are non-numeric and cannot be plotted.".format(y_col.columns.get_level_values(0)[0]))
self.err_handler.raise_exception(msg)
if convert_log and [abs(y0) for y0 in y_vals] != y_vals: # convert to bar-friendly units if possible
if subheader in ["log_activity", "log_molality", "log_gamma", "log_fugacity"]:
y_plot, out_unit = self.__convert_aq_units_to_log_friendly(yi, rows=x)
unit_type, unit = self.__get_unit_info(out_unit)
else:
y_plot = y_vals
else:
y_plot = y_vals
if i == 0:
subheader_previous = subheader
unit_type_previous = unit_type
if unit_type != unit_type_previous and i != 0:
msg = ("{} has a different unit of measurement ".format(yi)+""
"({}) than {} ({}). ".format(unit, yi_previous, unit_type_previous)+""
"Plotted variables must share units.")
self.err_handler.raise_exception(msg)
elif "activity" in subheader.lower() and "molality" in subheader_previous.lower():
msg = ("{} has a different unit of measurement ".format(yi)+""
"({}) than {} ({}). ".format("activity", yi_previous, "molality")+""
"Plotted variables must share units.")
self.err_handler.raise_exception(msg)
elif "molality" in subheader.lower() and "activity" in subheader_previous.lower():
msg = ("{} has a different unit of measurement ".format(yi)+""
"({}) than {} ({}). ".format("molality", yi_previous, "activity")+""
"Plotted variables must share units.")
self.err_handler.raise_exception(msg)
yi_previous = copy.deepcopy(yi)
unit_type_previous = copy.deepcopy(unit_type)
subheader_previous = copy.deepcopy(subheader)
df.loc[:, yi] = y_plot
if len(y) > 1:
if unit != "":
ylabel = "{} [{}]".format(unit_type, unit)
else:
ylabel = unit_type
else:
if 'pH' in y:
ylabel = 'pH'
elif 'Temperature' in y:
ylabel = 'Temperature [°C]'
else:
if unit != "":
ylabel = "{} {} [{}]".format(html_chemname_format_AqEquil(y[0]), unit_type, unit)
else:
ylabel = "{} {}".format(html_chemname_format_AqEquil(y[0]), unit_type)
df = pd.melt(df, id_vars=["name"], value_vars=y)
df = df.rename(columns={"Sample": "y_variable", "value": "y_value"})
df['y_variable'] = df['y_variable'].apply(html_chemname_format_AqEquil)
if unit_type == "energy supply" or unit_type == "affinity":
# get formatted reactions to display
if not isinstance(self.reactions_for_plotting, pd.DataFrame):
self.reactions_for_plotting = self.show_redox_reactions(formatted=True,
charge_sign_at_end=False,
show=False, simplify=True)
y_find = [yi.replace("_energy", "").replace("_affinity", "") for yi in y]
rxns = self.reactions_for_plotting.loc[y_find, :]["reaction"].tolist()*len(x)
if len(y) == 1:
ylabel = "{}<br>{} [{}]".format(html_chemname_format_AqEquil(y_find[0]), unit_type, unit)
# customdata for displaying reactions has to be here instead of in update_traces
fig = px.bar(df, x="name", y="y_value",
height=plot_height*ppi, width=plot_width*ppi,
color='y_variable', barmode='group',
labels={'y_value': ylabel}, template="simple_white",
color_discrete_map=dict_species_color, custom_data=[rxns])
fig.update_traces(hovertemplate = "%{x} <br>"+ylabel+": %{y}<br>%{customdata[0]}")
else:
fig = px.bar(df, x="name", y="y_value",
height=plot_height*ppi, width=plot_width*ppi,
color='y_variable', barmode='group',
labels={'y_value': ylabel}, template="simple_white",
color_discrete_map=dict_species_color)
fig.update_traces(hovertemplate = "%{x} <br>"+ylabel+": %{y}")
fig.update_layout(xaxis_tickangle=-45, xaxis_title=None,
title={'text':title, 'x':0.5, 'xanchor':'center'},
legend_title=None, margin={"t": 40},
xaxis={'fixedrange':True},
yaxis={'fixedrange':True, 'exponentformat':'power'})
if len(y) == 1:
fig.update_layout(showlegend=False)
save_as, save_format = self.__save_figure(fig, save_as, save_format,
save_scale, plot_width,
plot_height, ppi)
config = {'displaylogo': False,
'modeBarButtonsToRemove': ['zoom2d', 'pan2d', 'select2d',
'lasso2d', 'zoomIn2d', 'zoomOut2d',
'autoScale2d', 'resetScale2d',
'toggleSpikelines'],
'toImageButtonOptions': {
'format': save_format,
'filename': save_as,
'height': plot_height*ppi,
'width': plot_width*ppi,
'scale': save_scale,
},
}
if not interactive:
config['staticPlot'] = True
fig.show(config=config)
def scatterplot(self, x="pH", y="Temperature", title=None,
plot_width=4, plot_height=3, ppi=122,
fill_alpha=0.7, point_size=10,
colormap="WORM", save_as=None, save_format=None,
save_scale=1, interactive=True):
"""
Vizualize two or more sample variables with a scatterplot.
Parameters
----------
x, y : str, default for x is "pH", default for y is "Temperature"
Names of the variables to plot against each other. Valid variables
are columns in the speciation report. `y` can be a list of
of variable names for a multi-series scatterplot.
title : str, optional
Title of the plot.
plot_width, plot_height : numeric, default 4 by 3
Width and height of the plot, in inches. Size of interactive plots
is also determined by pixels per inch, set by the parameter `ppi`.
ppi : numeric, default 122
Pixels per inch. Along with `plot_width` and `plot_height`,
determines the size of interactive plots.
fill_alpha : numeric, default 0.7
Transparency of scatterpoint area fill.
point_size : numeric, default 10
Size of scatterpoints.
colormap : str, default "WORM"
Name of the colormap to color the scatterpoints. Accepts "WORM",
"colorblind", or matplotlib colormaps.
See https://matplotlib.org/stable/tutorials/colors/colormaps.html
The "colorblind" colormap is referenced from Wong, B. Points of view:
Color blindness. Nat Methods 8, 441 (2011).
https://doi.org/10.1038/nmeth.1618
save_as : str, optional
Provide a filename to save this figure. Filetype of saved figure is
determined by `save_format`.
Note: interactive plots can be saved by clicking the 'Download plot'
button in the plot's toolbar.
save_format : str, default "png"
Desired format of saved or downloaded figure. Can be 'png', 'jpg',
'jpeg', 'webp', 'svg', 'pdf', 'eps', 'json', or 'html'. If 'html',
an interactive plot will be saved. Only 'png', 'svg', 'jpeg',
and 'webp' can be downloaded with the 'download as' button in the
toolbar of an interactive plot.
save_scale : numeric, default 1
Multiply title/legend/axis/canvas sizes by this factor when saving
the figure.
interactive : bool, default True
Return an interactive plot if True or a static plot if False.
"""
if not isinstance(y, list):
y = [y]
if not isinstance(x, str):
self.err_handler.raise_exception("x must be a string.")
x_col = self.lookup(x)
try:
xsubheader = x_col.columns.get_level_values(1)[0]
except:
msg = ("Could not find '{}' ".format(x)+"in the speciation "
"report. Available variables include "
"{}".format(list(set(self.report.columns.get_level_values(0)))))
self.err_handler.raise_exception(msg)
try:
x_plot = [float(x0[0]) if x0[0] != 'NA' else float("nan") for x0 in x_col.values.tolist()]
except:
msg = ("One or more the values belonging to "
"'{}' are non-numeric and cannot be plotted.".format(x_col.columns.get_level_values(0)[0]))
self.err_handler.raise_exception(msg)
try:
xunit_type, xunit = self.__get_unit_info(xsubheader)
except:
xunit_type = ""
xunit = ""
colors = get_colors(colormap, len(y), alpha=fill_alpha)
for i, yi in enumerate(y):
y_col = self.lookup(yi)
try:
subheader = y_col.columns.get_level_values(1)[0]
except:
msg = ("Could not find '{}' ".format(yi)+"in the speciation "
"report. Available variables include "
"{}".format(list(set(self.report.columns.get_level_values(0)))))
self.err_handler.raise_exception(msg)
try:
unit_type, unit = self.__get_unit_info(subheader)
except:
unit_type = ""
unit = ""
try:
y_plot = [float(y0[0]) if y0[0] != 'NA' else float("nan") for y0 in y_col.values.tolist()]
except:
msg = ("One or more the values belonging to "
"'{}' are non-numeric and cannot be plotted.".format(y_col.columns.get_level_values(0)[0]))
self.err_handler.raise_exception(msg)
if i == 0:
subheader_previous = subheader
unit_type_previous = unit_type
if unit_type != unit_type_previous and i != 0:
msg = ("{} has a different unit of measurement ".format(yi)+""
"({}) than {} ({}). ".format(unit_type, yi_previous, unit_type_previous)+""
"Plotted variables must share units.")
self.err_handler.raise_exception(msg)
elif "activity" in subheader.lower() and "molality" in subheader_previous.lower():
msg = ("{} has a different unit of measurement ".format(yi)+""
"({}) than {} ({}). ".format("activity", yi_previous, "molality")+""
"Plotted variables must share units.")
self.err_handler.raise_exception(msg)
elif "molality" in subheader.lower() and "activity" in subheader_previous.lower():
msg = ("{} has a different unit of measurement ".format(yi)+""
"({}) than {} ({}). ".format("molality", yi_previous, "activity")+""
"Plotted variables must share units.")
self.err_handler.raise_exception(msg)
yi_previous = copy.deepcopy(yi)
unit_type_previous = copy.deepcopy(unit_type)
subheader_previous = copy.deepcopy(subheader)
if len(y) > 1:
if unit != "":
ylabel = "{} [{}]".format(unit_type, unit)
else:
ylabel = unit_type
else:
if 'pH' in y:
ylabel = 'pH'
elif 'Temperature' in y:
ylabel = 'Temperature [°C]'
else:
y_formatted = html_chemname_format_AqEquil(y[0])
if unit != "":
ylabel = "{} {} [{}]".format(y_formatted, unit_type, unit)
else:
ylabel = "{} {}".format(y_formatted, unit_type)
if x == 'pH':
xlabel = 'pH'
elif x == 'Temperature':
xlabel = 'Temperature [°C]'
else:
x_formatted = html_chemname_format_AqEquil(x)
if xunit != "":
xlabel = "{} {} [{}]".format(x_formatted, xunit_type, xunit)
else:
xlabel = "{} {}".format(x_formatted, xunit_type)
# convert rgba to hex
colors = [matplotlib.colors.rgb2hex(c) for c in colors]
# map each species to its color, e.g.,
# {'CO2': '#000000', 'HCO3-': '#1699d3', 'Other': '#736ca8'}
dict_species_color = {sp:color for sp,color in zip(y, colors)}
# html format color dict key names
dict_species_color = {html_chemname_format_AqEquil(k):v for k,v in dict_species_color.items()}
df = self.lookup(["name", x]+y).copy()
df.loc[:, "name"] = df.index
df.columns = df.columns.get_level_values(0)
df = pd.melt(df, id_vars=["name", x], value_vars=y)
df = df.rename(columns={"Sample": "y_variable", "value": "y_value"})
if (unit_type == "energy supply" or unit_type == "affinity") and self.reactions_for_plotting!=None:
# get formatted reactions to display
if not isinstance(self.reactions_for_plotting, pd.DataFrame):
self.reactions_for_plotting = self.show_redox_reactions(formatted=True,
charge_sign_at_end=False,
show=False, simplify=True)
y_find = [yi.replace("_energy", "").replace("_affinity", "") for yi in y]
rxns = self.reactions_for_plotting.loc[y_find, :]["reaction"].tolist()
rxn_dict = {rxn_name:rxn for rxn_name,rxn in zip(y, rxns)}
if len(y) == 1:
ylabel = "{}<br>{} [{}]".format(html_chemname_format_AqEquil(y_find[0]), unit_type, unit)
df["formatted_rxn"] = df["y_variable"].map(rxn_dict)
else:
df["formatted_rxn"] = ""
df['y_variable'] = df['y_variable'].apply(html_chemname_format_AqEquil)
fig = px.scatter(df, x=x, y="y_value", color="y_variable",
hover_data=[x, "y_value", "y_variable", "name", "formatted_rxn"],
width=plot_width*ppi, height=plot_height*ppi,
labels={x: xlabel, "y_value": ylabel},
category_orders={"species": y},
color_discrete_map=dict_species_color,
opacity=fill_alpha,
custom_data=['name', 'formatted_rxn'],
template="simple_white")
fig.update_traces(marker=dict(size=point_size),
hovertemplate = "%{customdata[0]}<br>"+xlabel+": %{x} <br>"+ylabel+": %{y}<br>%{customdata[1]}")
fig.update_layout(legend_title=None,
title={'text':title, 'x':0.5, 'xanchor':'center'},
margin={"t": 40},
yaxis={'exponentformat':'power'})
if len(y) == 1:
fig.update_layout(showlegend=False)
save_as, save_format = self.__save_figure(fig, save_as, save_format,
save_scale, plot_width,
plot_height, ppi)
config = {'displaylogo': False, 'scrollZoom': True,
'modeBarButtonsToRemove': ['select2d', 'lasso2d', 'toggleSpikelines', 'resetScale2d'],
'toImageButtonOptions': {
'format': save_format, # one of png, svg, jpeg, webp
'filename': save_as,
'height': plot_height*ppi,
'width': plot_width*ppi,
'scale': save_scale,
},
}
if not interactive:
config['staticPlot'] = True
fig.show(config=config)
def plot_mass_contribution(self, basis, title=None, sort_by=None,
ascending=True, sort_y_by=None, width=0.9,
colormap="WORM",
plot_width=4, plot_height=3, ppi=122,
save_as=None, save_format=None,
save_scale=1, interactive=True):
"""
Plot basis species contributions to mass balance of aqueous species
across all samples.
Parameters
----------
basis : str
Name of the basis species.
title : str, optional
Title of the plot.
sort_by : str, optional
Name of the variable used to sort samples. Variable names must be
taken from the speciation report column names. No sorting is done by
default.
ascending : bool, default True
Should sample sorting be in ascending order? Descending if False.
Ignored unless `sort_by` is defined.
sort_y_by : list of str or 'alphabetical', optional
List of species names in the order that they should be stacked, from
the bottom of the plot to the top. 'alphabetical' will sort species
alphabetically.
width : float, default 0.9
Width of bars. No space between bars if width=1.0.
colormap : str, default "WORM"
Name of the colormap to color the scatterpoints. Accepts "WORM",
"colorblind", or matplotlib colormaps.
See https://matplotlib.org/stable/tutorials/colors/colormaps.html
The "colorblind" colormap is referenced from Wong, B. Points of view:
Color blindness. Nat Methods 8, 441 (2011).
https://doi.org/10.1038/nmeth.1618
plot_width, plot_height : numeric, default 4 by 3
Width and height of the plot, in inches. Size of interactive plots
is also determined by pixels per inch, set by the parameter `ppi`.
ppi : numeric, default 122
Pixels per inch. Along with `plot_width` and `plot_height`,
determines the size of interactive plots.
save_as : str, optional
Provide a filename to save this figure. Filetype of saved figure is
determined by `save_format`.
Note: interactive plots can be saved by clicking the 'Download plot'
button in the plot's toolbar.
save_format : str, default "png"
Desired format of saved or downloaded figure. Can be 'png', 'jpg',
'jpeg', 'webp', 'svg', 'pdf', 'eps', 'json', or 'html'. If 'html',
an interactive plot will be saved. Only 'png', 'svg', 'jpeg',
and 'webp' can be downloaded with the 'download as' button in the
toolbar of an interactive plot.
save_scale : numeric, default 1
Multiply title/legend/axis/canvas sizes by this factor when saving
the figure.
interactive : bool, default True
Return an interactive plot if True or a static plot if False.
"""
try:
self.mass_contribution
except:
msg = ("Results for basis species contributions to aqueous mass "
"balance could not be found. Ensure that "
"get_mass_contribution = True when running speciate().")
self.err_handler.raise_exception(msg)
if basis not in set(self.mass_contribution['basis']):
msg = ("The basis species {} ".format(basis)+"could not be found "
"among available basis species: "
"{}".format(str(list(set(self.mass_contribution['basis'])))))
self.err_handler.raise_exception(msg)
df_sp = copy.deepcopy(self.mass_contribution.loc[self.mass_contribution['basis'] == basis])
if sort_by != None:
if sort_by in self.report.columns.get_level_values(0):
sort_col = self.lookup(sort_by)
sort_by_unit = sort_col.columns.get_level_values(1)[0]
sort_index = sort_col.sort_values([(sort_by, sort_by_unit)], ascending=ascending).index
df_list = []
for i in sort_index:
df_list.append(df_sp[df_sp['sample']==i])
df_sp = pd.concat(df_list)
else:
msg = ("Could not find {}".format(sort_by)+" in the "
"speciation report. Available variables include "
"{}".format(list(self.report.columns.get_level_values(0))))
self.err_handler.raise_exception(msg)
df_sp['percent'] = df_sp['percent'].astype(float)
unique_species = self.__unique(df_sp["species"])
if "Other" in unique_species:
unique_species.append(unique_species.pop(unique_species.index("Other")))
labels = self.__unique(df_sp["sample"])
bottom = np.array([0]*len(labels))
if sort_y_by != None:
if isinstance(sort_y_by, list):
if len(unique_species) == len(sort_y_by):
if len([s for s in unique_species if s in sort_y_by]) == len(unique_species) and len([s for s in sort_y_by if s in unique_species]) == len(unique_species):
unique_species = sort_y_by
else:
valid_needed = [s for s in unique_species if s not in sort_y_by]
invalid = [s for s in sort_y_by if s not in unique_species]
msg = ("sort_y_by is missing the following species: "
"{}".format(valid_needed)+" and was provided "
"these invalid species: {}".format(invalid))
self.err_handler.raise_exception(msg)
elif len(sort_y_by) < len(unique_species):
msg = ("sort_y_by must have of all of the "
"following species: {}".format(unique_species)+". "
"You are missing {}".format([s for s in unique_species if s not in sort_y_by]))
self.err_handler.raise_exception(msg)
else:
msg = ("sort_y_by can only have the "
"following species: {}".format(unique_species)+".")
self.err_handler.raise_exception(msg)
elif sort_y_by == "alphabetical":
if "Other" in unique_species:
unique_species_no_other = [sp for sp in unique_species if sp != "Other"]
unique_species_no_other = sorted(unique_species_no_other)
unique_species = unique_species_no_other + ["Other"]
else:
unique_species = sorted(unique_species)
else:
self.err_handler.raise_exception("sort_y_by must be either None, 'alphabetical', "
"or a list of species names.")
# get colormap
colors = get_colors(colormap, len(unique_species))
# convert rgba to hex
colors = [matplotlib.colors.rgb2hex(c) for c in colors]
df_sp["species"] = df_sp["species"].apply(html_chemname_format_AqEquil)
unique_species = [html_chemname_format_AqEquil(sp) for sp in unique_species]
# map each species to its color, e.g.,
# {'CO2': '#000000', 'HCO3-': '#1699d3', 'Other': '#736ca8'}
dict_species_color = {sp:color for sp,color in zip(unique_species, colors)}
if title == None:
title = '<span style="font-size: 14px;">Species accounting for mass balance of {}</span>'.format(html_chemname_format_AqEquil(basis))
fig = px.bar(df_sp, x="sample", y="percent", color="species",
width=plot_width*ppi, height=plot_height*ppi,
labels={"sample": "sample", "percent": "mole %", "species": "species"},
category_orders={"species": unique_species, "sample": labels},
color_discrete_map=dict_species_color,
template="simple_white",
)
fig.update_layout(xaxis_tickangle=-45, xaxis_title=None, legend_title=None,
title={'text':title, 'x':0.5, 'xanchor':'center'},
margin={"t": 40}, bargap=0, xaxis={'fixedrange':True},
yaxis={'fixedrange':True})
fig.update_traces(width=width, marker_line_width=0)
save_as, save_format = self.__save_figure(fig, save_as, save_format,
save_scale, plot_width,
plot_height, ppi)
config = {'displaylogo': False,
'modeBarButtonsToRemove': ['zoom2d', 'pan2d', 'select2d',
'lasso2d', 'zoomIn2d', 'zoomOut2d',
'autoScale2d', 'resetScale2d',
'toggleSpikelines'],
'toImageButtonOptions': {
'format': save_format, # one of png, svg, jpeg, webp
'filename': save_as,
'height': plot_height*ppi,
'width': plot_width*ppi,
'scale': save_scale,
},
}
if not interactive:
config['staticPlot'] = True
fig.show(config=config)
def plot_solid_solutions(self, sample, title=None,
width=0.9, colormap="WORM",
affinity_plot=True,
affinity_plot_colors=["blue", "orange"],
plot_width=4, plot_height=4, ppi=122,
save_as=None, save_format=None,
save_scale=1, interactive=True):
"""
Plot fractions of minerals of hypothetical solid solutions in a sample.
Parameters
----------
sample : str
Name of the sample.
title : str, optional
Title of the plot.
width : float, default 0.9
Width of bars. No space between bars if width=1.0.
colormap : str, default "WORM"
Name of the colormap to color the scatterpoints. Accepts "WORM",
"colorblind", or matplotlib colormaps.
See https://matplotlib.org/stable/tutorials/colors/colormaps.html
The "colorblind" colormap is referenced from Wong, B. Points of view:
Color blindness. Nat Methods 8, 441 (2011).
https://doi.org/10.1038/nmeth.1618
affinity_plot : bool, default True
Include the affinity subplot?
affinity_plot_colors : list of two str, default ["blue", "orange"]
Colors indicating positive and negative values in the affinity
subplot, respectively.
plot_width, plot_height : numeric, default 4 by 3
Width and height of the plot, in inches. Size of interactive plots
is also determined by pixels per inch, set by the parameter `ppi`.
ppi : numeric, default 122
Pixels per inch. Along with `plot_width` and `plot_height`,
determines the size of interactive plots.
save_as : str, optional
Provide a filename to save this figure. Filetype of saved figure is
determined by `save_format`.
Note: interactive plots can be saved by clicking the 'Download plot'
button in the plot's toolbar.
save_format : str, default "png"
Desired format of saved or downloaded figure. Can be 'png', 'jpg',
'jpeg', 'webp', 'svg', 'pdf', 'eps', 'json', or 'html'. If 'html',
an interactive plot will be saved. Only 'png', 'svg', 'jpeg',
and 'webp' can be downloaded with the 'download as' button in the
toolbar of an interactive plot.
save_scale : numeric, default 1
Multiply title/legend/axis/canvas sizes by this factor when saving
the figure.
interactive : bool, default True
Return an interactive plot if True or a static plot if False.
"""
if sample not in self.sample_data.keys():
msg = ("The sample "+sample+" was not found in this speciation dataset."
" Samples with solid solutions in this dataset include:"+str([s for s in self.sample_data.keys() if "solid_solutions" in self.sample_data[s].keys()]))
self.err_handler.raise_exception(msg)
try:
self.sample_data[sample]["solid_solutions"]
except:
msg = ("Results for solid solutions could not be found for this "
"sample. Samples with solid solutions in this speciation "
"dataset include:"+str([s for s in self.sample_data.keys() if "solid_solutions" in self.sample_data[s].keys()]))
self.err_handler.raise_exception(msg)
if title == None:
title = "Hypothetical solid solutions in " + sample
df_full = copy.deepcopy(self.sample_data[sample]["solid_solutions"])
df = copy.deepcopy(df_full.dropna(subset=['x']))
df = df[df['x'] != 0]
unique_minerals = self.__unique(df["mineral"])
# get colormap
colors = get_colors(colormap, len(unique_minerals))
# convert rgba to hex
colors = [matplotlib.colors.rgb2hex(c) for c in colors]
# map each species to its color, e.g.,
# {'CO2': '#000000', 'HCO3-': '#1699d3', 'Other': '#736ca8'}
dict_minerals_color = {sp:color for sp,color in zip(unique_minerals, colors)}
solid_solutions = list(dict.fromkeys(df["solid solution"]))
df_ss_only = df_full[df_full["x"].isnull()]
mineral_dict = {m:[] for m in unique_minerals}
for ss in solid_solutions:
for m in unique_minerals:
df_sub = df.loc[df["solid solution"] == ss,]
frac = df_sub.loc[df_sub["mineral"] == m, "x"]
if len(frac) > 0:
mineral_dict[m] = mineral_dict[m] + list(frac)
else:
mineral_dict[m].append(0)
if affinity_plot:
rows = 2
specs = [[{"type": "bar"}], [{"type": "bar"}]]
else:
rows = 1
specs = [[{"type": "bar"}]]
fig = make_subplots(
rows=rows, cols=1,
specs=specs,
vertical_spacing = 0.05
)
# subplot 1
for m in unique_minerals[::-1]:
fig.add_trace(go.Bar(name=m, x=solid_solutions, y=mineral_dict[m], marker_color=dict_minerals_color[m]), row=1, col=1)
# subplot 2
if affinity_plot:
fig.add_trace(go.Bar(name="ss", x=solid_solutions, y=df_ss_only["Aff, kcal"],
marker_color=[affinity_plot_colors[0] if val > 0 else affinity_plot_colors[1] for val in df_ss_only["Aff, kcal"]],
showlegend=False),
row=2, col=1)
fig.update_layout(barmode='stack', xaxis_tickangle=-45, xaxis_title=None, legend_title=None,
title={'text':title, 'x':0.5, 'xanchor':'center'}, autosize=False,
width=plot_width*ppi, height=plot_height*ppi,
margin={"t": 40}, bargap=0, xaxis={'fixedrange':True},
yaxis={'fixedrange':True}, template="simple_white")
fig.update_xaxes(tickangle=-45)
fig['layout']['yaxis']['title']='Mole Fraction'
if affinity_plot:
fig['layout']['yaxis2']['title']='Affinity, kcal/mol'
fig.update_xaxes(showticklabels=False) # hide all the xticks
fig.update_xaxes(showticklabels=True, row=2, col=1)
save_as, save_format = self.__save_figure(fig, save_as, save_format,
save_scale, plot_width,
plot_height, ppi)
config = {'displaylogo': False,
'modeBarButtonsToRemove': ['zoom2d', 'pan2d', 'select2d',
'lasso2d', 'zoomIn2d', 'zoomOut2d',
'autoScale2d', 'resetScale2d',
'toggleSpikelines'],
'toImageButtonOptions': {
'format': save_format, # one of png, svg, jpeg, webp
'filename': save_as,
'height': plot_height*ppi,
'width': plot_width*ppi,
'scale': save_scale,
},
}
if not interactive:
config['staticPlot'] = True
fig.show(config=config)
class AqEquil:
"""
Class containing functions to speciate aqueous water chemistry data using
existing or custom thermodynamic datasets.
Parameters
----------
eq36da : str, defaults to path given by the environment variable EQ36DA
Path to directory where data1 files are stored.
eq36co : str, defaults to path given by the environment variable EQ36CO
Path to directory where EQ3 executables are stored.
Attributes
----------
eq36da : str
Path to directory where data1 files are stored.
eq36co : str
Path to directory where EQ3 executables are stored.
df_input_processed : pd.DataFrame
Pandas dataframe containing user-supplied sample chemistry data that has
been processed by `speciate`.
half_cell_reactions : pd.DataFrame
Pandas dataframe containing half cell reactions that can be combined
into redox reactions for calculating chemical affinity and energy supply
values during speciation.
redox_pairs : list of int
List of indices of half reactions in the `half_cell_reactions` table
to be combined when generating full redox reactions.
affinity_energy_reactions_raw : str
A formatted TSV string of redox reactions for calculating chemical
affinities and energy supplies during speciation.
affinity_energy_reactions_table : pd.DataFrame
A table of redox reactions for calculating chemical affinities and
energy supplies during speciation.
affinity_energy_formatted_reactions : pd.DataFrame
A pandas dataframe containing balanced redox reactions written in full.
verbose : int, 0, 1, or 2, default 1
Level determining how many messages are returned during a
calculation. 2 for all messages, 1 for errors or warnings only,
0 for silent.
"""
def __init__(self,
eq36da=os.environ.get('EQ36DA'),
eq36co=os.environ.get('EQ36CO'),
hide_traceback=True):
self.eq36da = eq36da
self.eq36co = eq36co
self.df_input_processed = None
half_rxn_data = pkg_resources.resource_stream(__name__, "half_cell_reactions.csv")
self.half_cell_reactions = pd.read_csv(half_rxn_data) #define the input file (dataframe of redox pairs)
self.redox_pairs = None
self.affinity_energy_reactions_raw = None
self.affinity_energy_reactions_table = None
self.affinity_energy_formatted_reactions = None
self.verbose = 1
self.hide_traceback = hide_traceback
self.err_handler = Error_Handler(clean=self.hide_traceback)
os.environ['EQ36DA'] = self.eq36da # set eq3 db directory
os.environ['EQ36CO'] = self.eq36co # set eq3 .exe directory
def __capture_r_output(self):
"""
Capture and create a list of R console messages
"""
# Record output #
self.stdout = []
self.stderr = []
if not DEBUGGING_R:
# Dummy functions #
def add_to_stdout(line): self.stdout.append(line)
def add_to_stderr(line): self.stderr.append(line)
# Keep the old functions #
self.stdout_orig = rpy2.rinterface_lib.callbacks.consolewrite_print
self.stderr_orig = rpy2.rinterface_lib.callbacks.consolewrite_warnerror
# Set the call backs #
rpy2.rinterface_lib.callbacks.consolewrite_print = add_to_stdout
rpy2.rinterface_lib.callbacks.consolewrite_warnerror = add_to_stderr
def __file_exists(self, filename, ext='.csv'):
"""
Check that a file exists and that it has the correct extension.
Returns True if so, raises exception if not.
"""
ext_dict = {
".csv" : "comma separated values (.csv)",
".txt" : "standard text (.txt)",
".rds" : "R Data (.rds)",
}
if ext in filename[-4:]:
if os.path.exists(filename) and os.path.isfile(filename):
return True
else:
err = "Cannot locate input file {}/{}".format(os.getcwd(), filename)
self.err_handler.raise_exception(err)
else:
err = ("Input file {}".format(filename) + " "
"must be in {} format.".format(ext_dict[ext]))
self.err_handler.raise_exception(err)
return False
def _check_database_file(self, filename):
"""
Check for problems in the thermodynamic database CSV.
"""
# is the file a csv?
self.__file_exists(filename)
thermo_df = pd.read_csv(filename)
# does this file have the proper headers?
required_headers = ["name", "abbrv", "formula", "state",
"ref1", "ref2", "date", "E_units",
"G", "H", "S", "Cp", "V",
"a1.a", "a2.b", "a3.c", "a4.d", "c1.e", "c2.f",
"omega.lambda", "z.T",
"azero", "neutral_ion_type",
"dissrxn", "tag", "formula_ox"]
missing_headers = []
for header in required_headers:
if header not in thermo_df.columns:
missing_headers.append(header)
if len(missing_headers) > 0:
msg = ("The thermodynamic database file '{}'".format(filename)+" "
"is missing one or more required columns: "
"{}".format(", ".join(missing_headers))+". "
"Are these headers spelled correctly in the file?")
self.err_handler.raise_exception(msg)
# does Cl-, O2(g), and O2 exist in the file?
required_species = ["Cl-", "O2", "O2(g)"]
missing_species = []
for species in required_species:
if species not in list(thermo_df["name"]):
missing_species.append(species)
if len(missing_species) > 0:
msg = ("The thermodynamic database file '{}'".format(filename)+" "
"is missing required species:"
"{}".format(missing_species)+". Default thermodynamic values"
" will be used.")
warnings.warn(msg)
return
def _check_sample_input_file(self, input_filename, exclude, db, custom_db,
charge_balance_on, suppress_missing):
"""
Check for problems in sample input file.
"""
# does the input file exist? Is it a CSV?
if self.__file_exists(input_filename):
df_in = pd.read_csv(input_filename, header=None) # no headers for now so colname dupes can be checked
else:
self.err_handler.raise_exception("_check_sample_input() error!")
# are there any samples?
if df_in.shape[0] <= 2:
err_no_samples = ("The file {}".format(input_filename) + " "
"must contain at least three rows: the "
"first for column names, the second for column subheaders, "
"followed by one or more rows for sample data.")
self.err_handler.raise_exception(err_no_samples)
err_list = [] # for appending errors found in the sample input file
# get header list
col_list = list(df_in.iloc[0, 1:])
# are there blank headers?
if True in [isinstance(x, float) and x != x for x in col_list]:
# isinstance(x, float) and x != x is a typesafe way to check for nan
err_blank_header = ("One or more columns in the sample input "
"file have blank headers. These might be empty columns. "
"Only the first column may have a blank header. Remove any "
"empty columns and/or give each header a name.")
self.err_handler.raise_exception(err_blank_header)
# are there duplicate headers?
dupe_cols = list(set([x for x in col_list if col_list.count(x) > 1]))
if len(dupe_cols) > 0:
err_dupe_cols = ("Duplicate column names are not allowed. "
"Duplicate column names were found for:\n"
"{}".format(str(dupe_cols)))
err_list.append(err_dupe_cols)
df_in.columns = df_in.iloc[0] # set column names
df_in = df_in.drop(df_in.index[0], axis=0) # drop column name row
df_in_headercheck = copy.deepcopy(df_in.iloc[:,1:]) # drop first column. Deepcopy slice because drop() doesn't work well with unnamed columns.
# drop excluded headers
for exc in exclude:
if exc == df_in.columns[0]: # skip if 'sample' column is excluded
continue
try:
df_in_headercheck = df_in_headercheck.drop(exc, axis=1) # drop excluded columns
except:
err_bad_exclude = ("Could not exclude the header '{}'".format(exc)+". "
"This header could not be found in {}".format(input_filename)+"")
err_list.append(err_bad_exclude)
# get row list
row_list = list(df_in.iloc[1:, 0])
# are there blank rows?
if True in [isinstance(x, float) and x != x for x in row_list]:
# isinstance(x, float) and x != x is a typesafe way to check for nan
err_blank_row = ("One or more rows in the sample input "
"file have blank sample names. These might be empty rows. "
"Remove any empty rows and/or give each sample a name. Sample "
"names go in the first column.")
self.err_handler.raise_exception(err_blank_row)
# are there duplicate rows?
dupe_rows = list(set([x for x in row_list if row_list.count(x) > 1]))
if len(dupe_rows) > 0:
err_dupe_rows = ("Duplicate sample names are not allowed. "
"Duplicate sample names were found for:\n"
"{}".format(str(dupe_rows)))
err_list.append(err_dupe_rows)
# are there any leading or trailing spaces in sample names?
invalid_sample_names = [n for n in list(df_in.iloc[1:, 0]) if str(n[0])==" " or str(n[-1])==" "]
if len(invalid_sample_names) > 0:
err_sample_leading_trailing_spaces = ("The following sample names "
"have leading or trailing spaces. Remove spaces and try again: "
"{}".format(invalid_sample_names))
self.err_handler.raise_exception(err_sample_leading_trailing_spaces)
# are column names valid entries in the database?
if custom_db:
data0_path = "data0." + db
else:
data0_path = self.eq36da + "/data0." + db
if os.path.exists(data0_path) and os.path.isfile(data0_path):
with open(data0_path) as data0:
data0_lines = data0.readlines()
start_index = [i+1 for i, s in enumerate(data0_lines) if '* species name' in s]
end_index = [i-1 for i, s in enumerate(data0_lines) if 'elements' in s]
db_species = [i.split()[0] for i in data0_lines[start_index[0]:end_index[0]]]
if charge_balance_on == 'pH':
err_charge_balance_on_pH = ("To balance charge on pH, use "
"charge_balance_on='H+'")
err_list.append(err_charge_balance_on_pH)
elif charge_balance_on in ['Temperature', 'logfO2']:
err_charge_balance_invalid_type = ("Cannot balance charge "
"on {}.".format(charge_balance_on))
err_list.append(err_charge_balance_invalid_type)
elif charge_balance_on != "none" and charge_balance_on not in list(set(df_in_headercheck.columns)):
err_charge_balance_invalid_sp = ("The species chosen for charge balance"
" '{}'".format(charge_balance_on)+""
" was not found among the headers of the sample input file.")
err_list.append(err_charge_balance_invalid_sp)
for species in list(dict.fromkeys(df_in_headercheck.columns)):
if species not in db_species and species not in ['Temperature', 'logfO2', 'pH']:
err_species_not_in_db = ("The species '{}'".format(species) + " "
"was not found in {}".format(data0_path) + ". "
"If the column contains data that should not be "
"included in the speciation calculation, add the "
"column name to the 'exclude' argument. Try "
"help(AqEquil.AqEquil.speciate) "
"for more information about 'exclude'.")
err_list.append(err_species_not_in_db)
elif species == 'pH':
err_species_pH = ("Please rename the 'pH' column in "
"the sample input file to 'H+' with the subheader "
"unit 'pH'.")
err_list.append(err_species_pH)
else:
err_no_data0 = ("Could not locate {}.".format(data0_path) + " "
"Unable to determine if column headers included in "
"{} ".format(input_filename) + "match entries for species "
"in the requested thermodynamic database '{}'.".format(db))
err_list.append(err_no_data0)
# are subheader units valid?
subheaders = df_in_headercheck.iloc[0,]
valid_subheaders = ["degC", "ppm", "ppb", "Suppressed", "Molality",
"Molarity", "mg/L", "mg/kg.sol", "Alk., eq/kg.H2O",
"Alk., eq/L", "Alk., eq/kg.sol", "Alk., mg/L CaCO3",
"Alk., mg/L HCO3-", "Log activity", "Log act combo",
"Log mean act", "pX", "pH", "pHCl", "pmH", "pmX",
"Hetero. equil.", "Homo. equil.", "Make non-basis",
"logfO2", "Mineral"]
for i, subheader in enumerate(subheaders):
if subheader not in valid_subheaders:
err_valid_sub = ("The subheader '{}'".format(subheader) + " "
"for the column '{}'".format(df_in_headercheck.columns[i]) + " "
"is not recognized. Valid subheaders are {}".format(str(valid_subheaders)) + ". "
"If the column {}".format(df_in_headercheck.columns[i]) + " "
"contains data that is not meant for the "
"speciation calculation, add the column name "
"to the 'exclude' argument. Try help(AqEquil.AqEquil.speciate) "
"for more information about 'exclude'.")
err_list.append(err_valid_sub)
# is a 'Temperature' column present?
if "Temperature" not in df_in_headercheck.columns and "Temperature" not in exclude:
err_temp = ("The column 'Temperature' was not found in the input file. "
"Please include a column with 'Temperature' in the first row, "
"'degC' in the second row, and a temperature value for each "
"sample in degrees Celsius.")
err_list.append(err_temp)
# raise an exception that summarizes all errors found
if len(err_list) > 0:
errs = "\n\n*".join(err_list)
errs = ("The input file {}".format(input_filename)+" encountered"
" errors:\n\n*" + errs)
self.err_handler.raise_exception(errs)
return
def __clear_eqpt_extra_output(self):
"""
Deletes all EQPT output except data1.
"""
if os.path.exists("eqpt_log.txt") and os.path.isfile("eqpt_log.txt"):
os.remove("eqpt_log.txt")
if os.path.exists("data1f.txt") and os.path.isfile("data1f.txt"):
os.remove("data1f.txt")
if os.path.exists("slist.txt") and os.path.isfile("slist.txt"):
os.remove("slist.txt")
def runeqpt(self, db, extra_eqpt_output=False):
"""
Convert a data0 into a data1 file with EQPT.
Parameters
----------
db : str
Three letter code of database.
extra_eqpt_output : bool, default False
Keep additional output files from EQPT? These files include
eqpt_log.txt, data1f.txt, and slist.txt.
"""
if os.path.exists("data0."+db) and os.path.isfile("data0."+db):
pass
else:
self.err_handler.raise_exception(" ".join(["Error: could not locate custom database",
"data0.{} in {}.".format(db, os.getcwd())]))
if os.path.exists("data1."+db) and os.path.isfile("data1."+db):
os.remove("data1."+db)
self.__clear_eqpt_extra_output()
os.environ['EQ36DA'] = os.getcwd()
args = ['/bin/csh', self.eq36co+'/runeqpt', db]
try:
self.__run_script_and_wait(args) # run EQPT
except:
os.environ['EQ36DA'] = self.eq36da
self.err_handler.raise_exception(
"Error: EQPT failed to run on {}.".format("data0."+db))
if os.path.exists("data1") and os.path.isfile("data1"):
os.rename("data1", "data1."+db)
if os.path.exists("output") and os.path.isfile("output"):
os.rename("output", "eqpt_log.txt")
if os.path.exists("data1f") and os.path.isfile("data1f"):
os.rename("data1f", "data1f.txt")
if os.path.exists("slist") and os.path.isfile("slist"):
os.rename("slist", "slist.txt")
if os.path.exists("data1."+db) and os.path.isfile("data1."+db):
if self.verbose > 0:
print("Successfully created a data1."+db+" from data0."+db)
else:
msg = ("EQPT could not create data1."+db+" from "
"data0."+db+". Check eqpt_log.txt for details.")
self.err_handler.raise_exception(msg)
if not extra_eqpt_output:
self.__clear_eqpt_extra_output()
os.environ['EQ36DA'] = self.eq36da # reset default EQ36 db path
def runeq3(self, filename_3i, db,
samplename=None,
path_3i=os.getcwd(),
path_3o=os.getcwd(),
path_3p=os.getcwd()):
"""
Call EQ3 on a .3i input file.
Parameters
----------
filename_3i : str
Name of 3i input file.
db : str
Three letter code of database.
path_3i : path str, default current working directory
Path of .3i input files.
path_3o : path str, default current working directory
Path of .3o output files.
path_3p : path str, default current working directory
Path of .3p pickup files.
"""
# get current working dir
cwd = os.getcwd()
if samplename == None:
samplename = filename_3i[:-3]
if self.verbose > 0:
print('Using ' + db + ' to speciate ' + samplename)
os.chdir(path_3i) # step into 3i folder
args = ['/bin/csh', self.eq36co+'/runeq3', db, filename_3i]
self.__run_script_and_wait(args) # run EQ3
# restore working dir
os.chdir(cwd)
filename_3o = filename_3i[:-1] + 'o'
filename_3p = filename_3i[:-1] + 'p'
try:
# rename output
os.rename(path_3i + '/output', path_3i + "/" + filename_3o)
except:
if self.verbose > 0:
print('Error: EQ3 failed to produce output for ' + filename_3i)
try:
# move output
shutil.move(path_3i + "/" + filename_3o,
path_3o + "/" + filename_3o)
except:
if self.verbose > 0:
print('Error: Could not move', filename_3o, "to", path_3o)
try:
# rename pickup
os.rename(path_3i + '/pickup', path_3i + "/" + filename_3p)
move_pickup = True
except:
if self.verbose > 0:
print('Error: EQ3 failed to produce a pickup file for ' + filename_3i)
move_pickup = False
if move_pickup:
try:
# move pickup
shutil.move(path_3i + "/" + filename_3p,
path_3p + "/" + filename_3p)
except:
if self.verbose > 0:
print('Error: Could not move', filename_3p, "to", path_3p)
def runeq6(self, filename_6i, db,
samplename=None,
path_6i=os.getcwd(),
path_6o=os.getcwd(),
path_6p=os.getcwd()):
"""
Call EQ6 on a .6i input file.
Parameters
----------
filename_6i : str
Name of 6i input file.
db : str
Three letter code of database.
path_6i : path str, default current working directory
Path of .6i input files.
path_6o : path str, default current working directory
Path of .6o output files.
path_6p : path str, default current working directory
Path of .6p pickup files.
"""
# get current working dir
cwd = os.getcwd()
if samplename == None:
samplename = filename_6i[:-3]
if self.verbose > 0:
print('Using ' + db + ' to speciate ' + samplename)
os.chdir(path_6i) # step into 6i folder
args = ['/bin/csh', self.eq36co+'/runeq6', db, filename_6i]
self.__run_script_and_wait(args) # run EQ6
# restore working dir
os.chdir(cwd)
filename_6o = filename_6i[:-1] + 'o'
filename_6p = filename_6i[:-1] + 'p'
try:
# rename output
os.rename(path_6i + '/output', path_6i + "/" + filename_6o)
except:
if self.verbose > 0:
print('Error: EQ6 failed to produce output for ' + filename_6i)
try:
# move output
shutil.move(path_6i + "/" + filename_6o,
path_6o + "/" + filename_6o)
except:
if self.verbose > 0:
print('Error: Could not move', filename_6o, "to", path_6o)
try:
# rename pickup
os.rename(path_6i + '/pickup', path_6i + "/" + filename_6p)
move_pickup = True
except:
if self.verbose > 0:
print('Error: EQ6 failed to produce a pickup file for ' + filename_6i)
move_pickup = False
if move_pickup:
try:
# move pickup
shutil.move(path_6i + "/" + filename_6p,
path_6p + "/" + filename_6p)
except:
if self.verbose > 0:
print('Error: Could not move', filename_6p, "to", path_6p)
def __mk_check_del_directory(self, path):
"""
Checks for the dir being created. If it is already present, delete it
before recreating it.
"""
if not os.path.exists(path):
os.makedirs(path)
else:
shutil.rmtree(path)
os.makedirs(path)
def __read_inputs(self, file_type, location):
"""
Finds all files of a filetype in all downstream folders.
"""
file_name = [] # file names
file_list = [] # file names with paths
for root, dirs, files in os.walk(location):
for file in files:
if file.endswith(file_type):
if "-checkpoint" not in file:
file_name.append(file)
file_list.append(os.path.join(root, file))
return file_name, file_list
def __run_script_and_wait(self, args):
"""
Runs shell commands.
"""
# DEVNULL and STDOUT needed to suppress all warnings
subprocess.Popen(args, stdout=subprocess.DEVNULL, stderr=subprocess.STDOUT).wait()
def _delete_rxn_folders(self):
"""
Deletes folders storing raw EQ3/6 input and output.
"""
if os.path.exists('rxn_3i') and os.path.isdir('rxn_3i'):
shutil.rmtree('rxn_3i')
if os.path.exists('rxn_3o') and os.path.isdir('rxn_3o'):
shutil.rmtree('rxn_3o')
if os.path.exists('rxn_3p') and os.path.isdir('rxn_3p'):
shutil.rmtree('rxn_3p')
if os.path.exists('rxn_6i') and os.path.isdir('rxn_6i'):
shutil.rmtree('rxn_6i')
if os.path.exists('rxn_6o') and os.path.isdir('rxn_6o'):
shutil.rmtree('rxn_6o')
if os.path.exists('rxn_6p') and os.path.isdir('rxn_6p'):
shutil.rmtree('rxn_6p')
def speciate(self,
input_filename,
db="wrm",
redox_flag="logfO2",
redox_aux="Fe+3",
default_logfO2=-6,
exclude=[],
suppress=[],
alter_options=[],
charge_balance_on="none",
suppress_missing=True,
verbose=1,
report_filename=None,
get_aq_dist=True,
aq_dist_type="log_activity",
get_mass_contribution=True,
mass_contribution_other=True,
get_mineral_sat=True,
mineral_sat_type="affinity",
get_redox=True,
redox_type="Eh",
get_ion_activity_ratios=True,
get_fugacity=True,
get_basis_totals=True,
get_solid_solutions=True,
get_affinity_energy=False,
rxn_filename=None,
not_limiting=["H+", "OH-", "H2O"],
get_charge_balance=True,
custom_db=False,
extra_eqpt_output=False,
batch_3o_filename=None,
delete_generated_folders=False,
custom_obigt=None):
"""
Calculate the equilibrium distribution of chemical species in solution.
Additionally, calculate chemical affinities and energy supplies for
user-specified reactions.
Parameters
----------
input_filename : str
User-supplied utf8-encoded comma separated value (csv) file
containing sample data intended for speciation. The file must
follow this format:
- the first row is a header row that must contain the names of the
species to be included in the speciation calculation. There
cannot be duplicate headers.
- the second row must contain subheaders for each species in the
header row. These subheaders must be taken from the following:
degC
ppm
ppb
Suppressed
Molality
Molarity
mg/L
mg/kg.sol
Alk., eq/kg.H2O
Alk., eq/L
Alk., eq/kg.sol
Alk., mg/L CaCO3
Alk., mg/L HCO3-
Log activity
Log act combo
Log mean act
pX
pH
pHCl
pmH
pmX
Hetero. equil.
Homo. equil.
Make non-basis
- 'Temperature' must be included as a header, with 'degC' as its
subheader.
- The first column must contain sample names. There cannot be
duplicate sample names.
db : three letter str, default "wrm"
Three letter file extension for the desired thermodynamic database.
If `custom_db` is False, this database must be named data1.xyz
(where xyz is your desired three letter extension) and located
in the EQ3/6 'EQ36DA' path. Otherwise, the database must be named
data0.xyz and located in your current working directory. Note that
data1 files are already compiled by EQPT, while data0 files will be
automatically compiled for you if `custom_db` is True.
redox_flag : str, default "O2(g)"
Determines which column in the sample input file sets the overall
redox state of the samples. Options for redox_flag include 'O2(g)',
'pe', 'Eh', 'logfO2', and 'redox aux'. The code will search your
sample spreadsheet file (see `filename`) for a column corresponding
to the option you chose:
* 'O2(g)' with a valid subheader for a gas
* 'pe' with subheader pe
* 'Eh' with subheader volts
* 'logfO2' with subheader logfO2
* 'redox aux' will search for a column corresponding to the
auxilliary basis species selected to form a redox couple with its
linked strict basis species (see `redox_aux`). For example, the
redox couple Fe+2/Fe+3 would require a column named Fe+3
If an appropriate header or redox data cannot be found to define
redox state, `default_logfO2` is used to set sample logfO2.
There is a special case where dissolved oxygen can be used to impose
sample redox state if `redox_flag` is set to logfO2 and a column named
logfO2 does not appear in your sample spreadsheet. If there is a
column corresponding to dissolved oxygen measurements, logfO2 is
calculated from the equilibrium reaction O2(aq) = O2(g) at the
temperature and pressure of the sample using the revised Helgeson-
Kirkham-Flowers (HKF) equation of state (JC Tanger IV and HC
Helgeson, Am. J. Sci., 1988, 288, 19).
redox_aux : default "Fe+3", optional
Ignored unless `redox_flag` equals 1. Name of the auxilliary species
whose reaction links it to a basis species (or another auxilliary
species) such that they form a redox couple that controls sample
fO2. For instance, Fe+3 is linked to Fe+2 in many supporting data
files, so selecting `redox_flag` = 1 and `redox_aux` = "Fe+3" will
set sample fO2 based on the Fe+2/Fe+3 redox couple.
default_logfO2 : float, default -6
Default value for sample logfO2 in case redox data cannot be found
in the user-supplied sample spreadsheet.
exclude : list of str, default []
Names of columns in the user-supplied sample spreadsheet that should
not be considered aqueous species. Useful for excluding columns
containing sample metatadata, such as "Year" and "Location".
suppress : list of str, default []
Names of chemical species that will be prevented from forming in the
speciation calculation.
alter_options : list, default []
A list of lists, e.g.,
[["CaOH+", "Suppress"], ["CaCl+", "AugmentLogK", -1]]
The first element of each interior list is the name of a species.
The second element is an option to alter the species, and can be:
- Suppress : suppress the formation of the species. (See also:
`suppress`).
- Replace : replace the species' log K value with a desired value.
- AugmentLogK : augment the value of the species' log K.
- AugmentG : augment the Gibbs free energy of the species by a
desired value, in kcal/mol.
The third element is a numeric value corresponding to the chosen
option. A third element is not required for Suppress.
charge_balance_on : str, default "none"
If "none", will not balance electrical charge between cations and
anions in the speciation calculation. If a name of a species is
supplied instead, the activity of that species will be allowed to
change until charge balance is obtained. For example,
charge_balance_on = "H+" will calculate what pH a sample must have
to have zero net charge.
suppress_missing : bool, default True
Suppress the formation of an aqueous species if it is missing a
value in the user-supplied sample spreadsheet?
verbose : int, 0, 1, or 2, default 1
Level determining how many messages are returned during a
calculation. 2 for all messages, 1 for errors or warnings only,
0 for silent.
report_filename : str, optional
Name of the comma separated values (csv) report file generated when
the calculation is complete. If this argument is not defined, a
report file is not generated.
get_aq_dist : bool, default True
Calculate distributions of aqueous species?
aq_dist_type : str, default "log_activity"
Desired units of measurement for reported distributions of aqueous
species. Can be "molality", "log_molality", "log_gamma", or
"log_activity". Ignored if `get_aq_dist` is False.
get_mass_contribution : bool, default True
Calculate basis species contributions to mass balance of aqueous
species?
mass_contribution_other : bool, default True
Include an "other" species for the sake of summing percents of basis
species contributions to 100%? Ignored if `get_mass_contribution` is
False.
get_mineral_sat : bool, default True
Calculate saturation states of pure solids?
mineral_sat_type : str, default "affinity"
Desired units of measurement for reported saturation states of pure
solids. Can be "logQoverK" or "affinity". Ignored if
`get_mineral_sat` is False.
get_redox : bool, default True
Calculate potentials of redox couples?
redox_type : str, default "Eh"
Desired units of measurement for reported redox potentials. Can be
"Eh", "pe", "logfO2", or "Ah". Ignored if `get_redox` is False.
get_ion_activity_ratios : bool, default True
Calculate ion/H+ activity ratios and neutral species activities?
get_fugacity : bool, default True
Calculate gas fugacities?
get_basis_totals : bool, default True
Report total compositions of basis aqueous species?
get_solid_solutions : bool, default True
Permit the calculation of solid solutions and include them in the
speciation report?
get_affinity_energy : bool, default False
Calculate affinities and energy supplies of reactions listed in a
separate user-supplied file?
rxn_filename : str, optional
Name of .txt file containing reactions used to calculate affinities
and energy supplies. Ignored if `get_affinity_energy` is False.
not_limiting : list, default ["H+", "OH-", "H2O"]
List containing names of species that are not considered limiting
when calculating energy supplies. Ignored if `get_affinity_energy`
is False.
get_charge_balance : bool, default True
Calculate charge balance and ionic strength?
custom_db : bool, default False
Is the database defined by `db` a custom user-supplied database? If
this is set to True, searches for a data0.xyz file in the current
working directory, where 'xyz' corresponds to the three letter code
assigned to `db`. This data0 file is automatically converted into a
machine-readable file called data1 by software called EQPT. This
data1 file is then used in speciation calculations.
extra_eqpt_output : bool, default False
Keep additional output files created by EQPT (see `custom_db`)?
Ignored if `custom_db` is False.
batch_3o_filename : str, optional
Name of rds (R object) file exported after the speciation
calculation? No file will be generated if this argument is not
defined.
delete_generated_folders : bool, default False
Delete the 'rxn_3i', 'rxn_3o', and 'rxn_3p' folders containing raw
EQ3NR input, output, and pickup files once the speciation
calculation is complete?
custom_obigt : str, optional unless `get_affinity_energy` is True
Path of custom database csv used to generate a custom data0 file.
Needed for affinity and energy calculations.
Returns
-------
speciation : object of class Speciation
Contains the results of the speciation calculation.
"""
self.verbose = verbose
# check input sample file for errors
self._check_sample_input_file(input_filename, exclude, db, custom_db,
charge_balance_on, suppress_missing)
if aq_dist_type not in ["molality", "log_molality", "log_gamma", "log_activity"]:
self.err_handler.raise_exception("Unrecognized aq_dist_type. Valid "
"options are 'molality', 'log_molality', 'log_gamma', 'log_activity'")
if mineral_sat_type not in ["logQoverK", "affinity"]:
self.err_handler.raise_exception("Unrecognized mineral_sat_type. Valid "
"options are 'logQoverK' or 'affinity'")
if redox_type not in ["Eh", "pe", "logfO2", "Ah"]:
self.err_handler.raise_exception("Unrecognized redox_type. Valid "
"options are 'Eh', 'pe', 'logfO2', or 'Ah'")
if redox_flag == "O2(g)" or redox_flag == -3:
redox_flag = -3
elif redox_flag == "pe" or redox_flag == -2:
redox_flag = -2
elif redox_flag == "Eh" or redox_flag == -1:
redox_flag = -1
elif redox_flag == "logfO2" or redox_flag == 0:
redox_flag = 0
elif redox_flag == "redox aux" or redox_flag == 1:
redox_flag = 1
else:
self.err_handler.raise_exception("Unrecognized redox flag. Valid options are 'O2(g)'"
", 'pe', 'Eh', 'logfO2', 'redox aux'")
# handle batch_3o naming
if batch_3o_filename != None:
if ".rds" in batch_3o_filename[-4:]:
batch_3o_filename = batch_3o_filename
else:
batch_3o_filename = "batch_3o_{}.rds".format(db)
else:
batch_3o_filename = ro.r("NULL")
# custom obigt used for energy calculations (temporary fix to allow
# custom data to be imported into CHNOSZ for energy calculations)
# TODO: remove this and have code find custom data automatically. It's a tricky problem!
if isinstance(custom_obigt, str):
if os.path.exists(custom_obigt) and os.path.isfile(custom_obigt):
pass
else:
err = ("Could not find custom_obigt file {}.".format(custom_obigt))
self.err_handler.raise_exception(err)
else:
custom_obigt = ro.r("NULL")
if custom_db:
# EQ3/6 cannot handle spaces in the 'EQ36DA' path name.
if " " in os.getcwd():
msg = ("Error: the path to the custom database "
"cannot contain spaces. The current path "
"is: [ " + os.getcwd() + " ]. Remove or "
"replace spaces in folder names for this "
"feature. Example: [ " + os.getcwd().replace(" ", "-") + " ].")
self.err_handler.raise_exception(msg)
self.runeqpt(db, extra_eqpt_output)
os.environ['EQ36DA'] = os.getcwd()
data0_path = "data0." + db
else:
data0_path = self.eq36da + "/data0." + db
if os.path.exists(data0_path) and os.path.isfile(data0_path):
with open(data0_path) as data0:
data0_lines = data0.readlines()
start_index = [i+1 for i, s in enumerate(data0_lines) if s == 'temperatures\n']
end_index = [i for i, s in enumerate(data0_lines) if s == 'debye huckel a (adh)\n']
db_grids_unformatted = [i.split("pressures")[0] for i in data0_lines[start_index[0]:end_index[0]]]
db_grids = [" ".join(i.split()) for i in db_grids_unformatted if i != '']
grid_temp = db_grids[0] + " " + db_grids[1]
grid_press = db_grids[2] + " " + db_grids[3]
grid_temp = grid_temp.split(" ")
grid_press = grid_press.split(" ")
try:
water_model = data0_lines[1].split("model: ")[1] # extract water model from the second line of data0 file
water_model = water_model.replace("\n", "")
except:
water_model = "SUPCRT92"
# print("Water model could not be referenced from {}".format(data0_path)+""
# ". Defaulting to SUPCRT92 water model...")
if(water_model not in ["SUPCRT92", "IAPWS95", "DEW"]):
water_model = "SUPCRT92" # the default for EQ3/6
print("Water model given in {}".format(data0_path)+" was not "
"recognized. Defaulting to SUPCRT92 water model...")
else: # if a data0 file can't be found, assume default water model, 0-350 C and PSAT
water_model = "SUPCRT92"
grid_temp = ["0.0100", "50.0000", "100.0000", "150.0000",
"200.0000", "250.0000", "300.0000", "350.0000"]
grid_press = ["1.0000", "1.0000", "1.0132", "4.7572",
"15.5365", "39.7365", "85.8378", "165.2113"]
if get_affinity_energy:
if rxn_filename == None and self.affinity_energy_reactions_raw==None:
err = ("get_affinity_energy is set to True but a reaction "
"file is not specified or redox reactions have not yet "
"been generated with generate_redox_reactions()")
self.err_handler.raise_exception(err)
elif rxn_filename != None:
self.__file_exists(rxn_filename, '.txt')
self.affinity_energy_reactions_raw = pd.read_csv(rxn_filename, sep="\t", header=None, names=["col"+str(i) for i in range(1,50)])
load_rxn_file = True
else:
rxn_filename = self.affinity_energy_reactions_raw
load_rxn_file = False
else:
rxn_filename = ""
load_rxn_file=False
# handle Alter/Suppress options
# e.g. [["CaCl+", "AugmentLogK", -1], ["CaOH+", "Suppress"]]
alter_options_dict = {}
if len(alter_options) > 0:
for ao in alter_options:
key = ao[0]
if ao[1] == "Suppress" and len(ao) == 2:
ao += ["0"]
alter_options_dict[key] = convert_to_RVector(list(ao[1:]))
alter_options = ro.ListVector(alter_options_dict)
# preprocess for EQ3 using R scripts
self.__capture_r_output()
r_prescript = pkg_resources.resource_string(
__name__, 'preprocess_for_EQ3.r').decode("utf-8")
ro.r(r_prescript)
df_input_processed = ro.r.preprocess(input_filename=input_filename,
exclude=convert_to_RVector(
exclude),
redox_flag=redox_flag,
redox_aux=redox_aux,
default_logfO2=default_logfO2,
charge_balance_on=charge_balance_on,
suppress_missing=suppress_missing,
suppress=convert_to_RVector(
suppress),
alter_options=alter_options,
water_model=water_model,
grid_temp=convert_to_RVector(grid_temp),
grid_press=convert_to_RVector(grid_press),
get_solid_solutions=get_solid_solutions,
verbose=self.verbose)
for line in self.stderr: print(line)
self.df_input_processed = ro.conversion.rpy2py(df_input_processed)
# run EQ3 on each input file
cwd = os.getcwd()
self.__mk_check_del_directory('rxn_3o')
self.__mk_check_del_directory('rxn_3p')
files_3i, files_3i_paths = self.__read_inputs('3i', 'rxn_3i')
input_dir = cwd + "/rxn_3i/"
output_dir = cwd + "/rxn_3o/"
pickup_dir = cwd + "/rxn_3p/"
for file in files_3i:
samplename = self.df_input_processed.loc[file[:-3], "Sample"]
self.runeq3(filename_3i=file, db=db, samplename=samplename,
path_3i=input_dir, path_3o=output_dir,
path_3p=pickup_dir)
if custom_db:
os.environ['EQ36DA'] = self.eq36da
files_3o = [file+".3o" for file in self.df_input_processed.index]
df_input_processed_names = convert_to_RVector(list(self.df_input_processed.columns))
# mine output
self.__capture_r_output()
r_3o_mine = pkg_resources.resource_string(
__name__, '3o_mine.r').decode("utf-8")
ro.r(r_3o_mine)
batch_3o = ro.r.main_3o_mine(
files_3o=convert_to_RVector(files_3o),
input_filename=input_filename,
rxn_filename=rxn_filename,
get_aq_dist=get_aq_dist,
aq_dist_type=aq_dist_type,
get_mass_contribution=get_mass_contribution,
mass_contribution_other=mass_contribution_other,
get_mineral_sat=get_mineral_sat,
mineral_sat_type=mineral_sat_type,
get_redox=get_redox,
redox_type=redox_type,
get_charge_balance=get_charge_balance,
get_ion_activity_ratios=get_ion_activity_ratios,
get_fugacity=get_fugacity,
get_basis_totals=get_basis_totals,
get_solid_solutions=get_solid_solutions,
get_affinity_energy=get_affinity_energy,
load_rxn_file=load_rxn_file,
not_limiting=convert_to_RVector(not_limiting),
batch_3o_filename=batch_3o_filename,
df_input_processed=ro.conversion.py2rpy(self.df_input_processed),
# New rpy2 py2rpy2 conversion might not need the workaround below.
# The old note regarding deprecated pandas2ri is shown below...
# OLD NOTE:
# Needed for keeping symbols in column names after porting
# df_input_processed in the line above. Some kind of check.names
# option for pandas2ri.py2ri would be nice. Workaround:
df_input_processed_names=df_input_processed_names,
custom_obigt=custom_obigt,
verbose=self.verbose,
)
for line in self.stderr: print(line)
if len(batch_3o) == 0:
self.err_handler.raise_exception("Could not compile a speciation report. This is "
"likely because errors occurred during "
"the speciation calculation.")
return
if get_mass_contribution:
mass_contribution = ro.conversion.rpy2py(batch_3o.rx2('mass_contribution'))
df_report = ro.conversion.rpy2py(batch_3o.rx2('report'))
#df_input = ro.conversion.rpy2py(batch_3o.rx2('input'))
report_divs = batch_3o.rx2('report_divs')
input_cols = list(report_divs.rx2('input'))
df_input = df_report[input_cols].copy()
# add a pressure column to df_input
df_input["Pressure_bar"] = pd.Series(dtype='float')
sample_data = batch_3o.rx2('sample_data')
for sample in sample_data:
df_input.loc[str(sample.rx2('name')[0]), "Pressure_bar"] = float(sample.rx2('pressure')[0])
report_divs[0] = convert_to_RVector(input_cols + ["Pressure_bar"])
# handle headers and subheaders of input section
headers = [col.split("_")[0] for col in list(df_input.columns)]
headers = ["pH" if header == "H+" else header for header in headers]
headers = [header+"_(input)" if header not in ["Temperature", "logfO2", "Pressure"]+exclude else header for header in headers]
report_divs[0] = convert_to_RVector(headers) # modify headers in the 'input' section, report_divs[0]
subheaders = [subheader[1] if len(subheader) > 1 else "" for subheader in [
col.split("_") for col in list(df_input.columns)]]
multicolumns = pd.MultiIndex.from_arrays(
[headers, subheaders], names=['Sample', ''])
df_input.columns = multicolumns
df_join = df_input
if get_aq_dist:
aq_distribution_cols = list(report_divs.rx2('aq_distribution'))
df_aq_distribution = df_report[aq_distribution_cols]
df_aq_distribution = df_aq_distribution.apply(pd.to_numeric, errors='coerce')
# create a pH column from H+
df_aq_distribution["pH"] = np.nan # pH values are assigned when sample data is assembled later
# handle headers of aq_distribution section
headers = df_aq_distribution.columns
subheaders = [aq_dist_type]*(len(headers)-1) # -1 because the last column will have subheader pH (see next line)
subheaders = subheaders + ["pH"]
multicolumns = pd.MultiIndex.from_arrays(
[headers, subheaders], names=['Sample', ''])
df_aq_distribution.columns = multicolumns
# ensure final pH column is included in report_divs aq_distribution section
aq_dist_indx = report_divs.names.index("aq_distribution")
report_divs[aq_dist_indx] = convert_to_RVector(list(headers))
df_join = df_join.join(df_aq_distribution)
if get_mineral_sat:
mineral_sat_cols = list(report_divs.rx2('mineral_sat'))
mineral_sat_cols = [col for col in mineral_sat_cols if col != "df"] # TO DO: why is df appearing in mineral sat cols and redox sections?
df_mineral_sat = df_report[mineral_sat_cols]
df_mineral_sat = df_mineral_sat.apply(pd.to_numeric, errors='coerce')
# handle headers of df_mineral_sat section
if mineral_sat_type == "affinity":
mineral_sat_unit = "affinity_kcal"
elif mineral_sat_type == "logQoverK":
mineral_sat_unit = "logQ/K"
else:
self.err_handler.raise_exception(
"mineral_sat_type must be either 'affinity' or 'logQoverK'")
headers = df_mineral_sat.columns
subheaders = [mineral_sat_unit]*len(headers)
multicolumns = pd.MultiIndex.from_arrays(
[headers, subheaders], names=['Sample', ''])
df_mineral_sat.columns = multicolumns
df_join = df_join.join(df_mineral_sat)
if get_redox:
redox_cols = list(report_divs.rx2('redox'))
redox_cols = [col for col in redox_cols if col != "df"] # TO DO: why is df appearing in mineral sat cols and redox sections?
df_redox = df_report[redox_cols]
df_redox = df_redox.apply(pd.to_numeric, errors='coerce')
# handle headers of df_redox section
if redox_type == "Eh":
redox_unit = "Eh_volts"
elif redox_type == "pe":
redox_unit = "pe"
elif redox_type == "logfO2":
redox_unit = "logfO2"
elif redox_type == "Ah":
redox_unit = "Ah_kcal"
else:
self.err_handler.raise_exception(
"redox_type must be either 'Eh', 'pe', 'logfO2', or 'Ah'")
headers = df_redox.columns
subheaders = [redox_unit]*len(headers)
multicolumns = pd.MultiIndex.from_arrays(
[headers, subheaders], names=['Sample', ''])
df_redox.columns = multicolumns
df_join = df_join.join(df_redox)
if get_charge_balance:
charge_balance_cols = list(report_divs.rx2('charge_balance'))
df_charge_balance = df_report[charge_balance_cols]
df_charge_balance = df_charge_balance.apply(pd.to_numeric, errors='coerce')
# handle headers of df_charge_balance section
headers = df_charge_balance.columns
subheaders = ["%"]*2 + ['eq/kg.H2O', 'molality'] + \
['eq/kg.H2O']*4 + ['molality']
multicolumns = pd.MultiIndex.from_arrays(
[headers, subheaders], names=['Sample', ''])
df_charge_balance.columns = multicolumns
df_join = df_join.join(df_charge_balance)
if get_ion_activity_ratios:
ion_activity_ratio_cols = list(report_divs.rx2('ion_activity_ratios'))
df_ion_activity_ratios = df_report[ion_activity_ratio_cols]
df_ion_activity_ratios = df_ion_activity_ratios.apply(pd.to_numeric, errors='coerce')
# handle headers of df_ion_activity_ratios section
headers = df_ion_activity_ratios.columns
subheaders = ["Log ion-H+ activity ratio"]*len(headers)
multicolumns = pd.MultiIndex.from_arrays(
[headers, subheaders], names=['Sample', ''])
df_ion_activity_ratios.columns = multicolumns
df_join = df_join.join(df_ion_activity_ratios)
if get_fugacity:
fugacity_cols = list(report_divs.rx2('fugacity'))
df_fugacity = df_report[fugacity_cols]
df_fugacity = df_fugacity.apply(pd.to_numeric, errors='coerce')
# handle headers of fugacity section
headers = df_fugacity.columns
subheaders = ["log_fugacity"]*len(headers)
multicolumns = pd.MultiIndex.from_arrays(
[headers, subheaders], names=['Sample', ''])
df_fugacity.columns = multicolumns
df_join = df_join.join(df_fugacity)
if get_basis_totals:
sc_cols = list(report_divs.rx2('basis_totals'))
df_sc = df_report[sc_cols]
df_sc = df_sc.apply(pd.to_numeric, errors='coerce')
# handle headers of basis_totals section
headers = df_sc.columns
subheaders = ["molality"]*(len(headers))
multicolumns = pd.MultiIndex.from_arrays(
[headers, subheaders], names=['Sample', ''])
df_sc.columns = multicolumns
df_join = df_join.join(df_sc)
if get_affinity_energy:
affinity_cols = list(report_divs.rx2('affinity'))
energy_cols = list(report_divs.rx2('energy'))
df_affinity = df_report[affinity_cols]
df_energy = df_report[energy_cols]
df_affinity = df_affinity.apply(pd.to_numeric, errors='coerce')
df_energy = df_energy.apply(pd.to_numeric, errors='coerce')
# handle headers of df_affinity section
headers = df_affinity.columns
subheaders = ['cal/mol e-']*len(headers)
multicolumns = pd.MultiIndex.from_arrays(
[headers, subheaders], names=['Sample', ''])
df_affinity.columns = multicolumns
# handle headers of df_energy section
headers = df_energy.columns
subheaders = ['cal/kg.H2O']*len(headers)
multicolumns = pd.MultiIndex.from_arrays(
[headers, subheaders], names=['Sample', ''])
df_energy.columns = multicolumns
df_join = df_join.join(df_affinity)
df_join = df_join.join(df_energy)
out_dict = {'sample_data': {},
'report': df_join,
'input': df_input, 'report_divs': report_divs}
if get_mass_contribution:
out_dict['mass_contribution'] = mass_contribution
sample_data = batch_3o.rx2('sample_data')
# assemble sample data
for i, sample in enumerate(sample_data):
dict_sample_data = {
"filename": str(sample.rx2('filename')[0]),
"name": str(sample.rx2('name')[0]),
"temperature": float(sample.rx2('temperature')[0]),
"pressure": float(sample.rx2('pressure')[0]),
"logact_H2O": float(sample.rx2('logact_H2O')[0]),
"H2O_density": float(sample.rx2('H2O_density')[0]),
"H2O_molality": float(sample.rx2('H2O_molality')[0]),
"H2O_log_molality": float(sample.rx2('H2O_log_molality')[0]),
}
if get_aq_dist:
sample_aq_dist = ro.conversion.rpy2py(sample.rx2('aq_distribution'))
sample_aq_dist = sample_aq_dist.apply(pd.to_numeric, errors='coerce')
sample_pH = -sample_aq_dist.loc["H+", "log_activity"]
out_dict["report"].loc[str(sample.rx2('name')[0]), "pH"] = sample_pH
dict_sample_data.update({"aq_distribution": sample_aq_dist})
if get_mass_contribution:
sample_mass_contribution = mass_contribution[mass_contribution["sample"] == sample.rx2('name')[0]]
dict_sample_data.update(
{"mass_contribution": sample_mass_contribution})
if get_mineral_sat:
dict_sample_data.update(
{"mineral_sat": ro.conversion.rpy2py(sample.rx2('mineral_sat')).apply(pd.to_numeric, errors='coerce')})
# replace sample mineral_sat entry with None if there is no mineral saturation data.
if(len(dict_sample_data['mineral_sat'].index) == 1 and dict_sample_data['mineral_sat'].index[0] == 'None'):
dict_sample_data['mineral_sat'] = None
if get_redox:
dict_sample_data.update(
{"redox": ro.conversion.rpy2py(sample.rx2('redox')).apply(pd.to_numeric, errors='coerce')})
if get_charge_balance:
dict_sample_data.update({"charge_balance": df_charge_balance.loc[sample.rx2('name')[0], :]})
if get_ion_activity_ratios:
try:
dict_sample_data.update(
{"ion_activity_ratios": ro.conversion.rpy2py(sample.rx2('ion_activity_ratios'))})
except:
dict_sample_data['ion_activity_ratios'] = None
if get_fugacity:
dict_sample_data.update(
{"fugacity": ro.conversion.rpy2py(sample.rx2('fugacity')).apply(pd.to_numeric, errors='coerce')})
# replace sample fugacity entry with None if there is no fugacity data.
if(len(dict_sample_data['fugacity'].index) == 1 and dict_sample_data['fugacity'].index[0] == 'None'):
dict_sample_data['fugacity'] = None
else:
dict_sample_data["fugacity"]["fugacity"] = 10**dict_sample_data["fugacity"]["log_fugacity"]
if get_basis_totals:
sc_dist = ro.conversion.rpy2py(sample.rx2('basis_totals'))
sc_dist = sc_dist.apply(pd.to_numeric, errors='coerce')
dict_sample_data.update({"basis_totals": sc_dist})
if get_solid_solutions:
sample_solid_solutions = batch_3o.rx2["sample_data"].rx2[sample.rx2('name')[0]].rx2["solid_solutions"]
if not type(sample_solid_solutions.names) == rpy2.rinterface_lib.sexp.NULLType:
ss_df_list = []
for ss in list(sample_solid_solutions.names):
df_ss_ideal = sample_solid_solutions.rx2[ss].rx2["ideal solution"]
df_ss_mineral = sample_solid_solutions.rx2[ss].rx2["mineral"]
df_merged = pd.merge(df_ss_mineral, df_ss_ideal, left_on='mineral', right_on='component', how='left')
df_merged.insert(0, 'solid solution', ss)
del df_merged['component']
ss_df_list.append(df_merged)
dict_sample_data.update(
{"solid_solutions": pd.concat(ss_df_list)})
if get_affinity_energy:
dict_sample_data.update({"affinity_energy_raw": ro.conversion.rpy2py(
sample.rx2('affinity_energy_raw'))})
dict_sample_data.update(
{"affinity_energy": ro.conversion.rpy2py(sample.rx2('affinity_energy'))})
out_dict["sample_data"].update(
{sample_data.names[i]: dict_sample_data})
out_dict.update({"batch_3o": batch_3o})
out_dict.update({"water_model":water_model, "grid_temp":grid_temp, "grid_press":grid_press})
speciation = Speciation(out_dict, hide_traceback=self.hide_traceback)
if get_affinity_energy:
speciation.half_cell_reactions = self.half_cell_reactions
speciation.affinity_energy_reactions_table = self.affinity_energy_reactions_table
speciation.affinity_energy_formatted_reactions = self.affinity_energy_formatted_reactions
speciation.show_redox_reactions = self.show_redox_reactions
if report_filename != None:
if ".csv" in report_filename[-4:]:
out_dict["report"].to_csv(report_filename)
else:
out_dict["report"].to_csv(report_filename+".csv")
if delete_generated_folders:
self._delete_rxn_folders()
if self.verbose > 0:
print("Finished!")
return speciation
def create_data0(self,
db,
filename,
filename_ss=None,
data0_formula_ox_name=None,
suppress_redox=[],
water_model="SUPCRT92",
exceed_Ttr=True,
grid_temps=[0.0100, 50.0000, 100.0000, 150.0000,
200.0000, 250.0000, 300.0000, 350.0000],
grid_press="Psat",
infer_formula_ox=False,
generate_template=True,
template_name=None,
template_type="strict",
verbose=1):
"""
Create a data0 file from a custom thermodynamic dataset.
Parameters
----------
db : str
Desired three letter code of data0 output.
filename : str
Name of csv file containing thermodynamic data in the OBIGT format.
filename_ss : str, optional
Name of file containing solid solution parameters.
data0_formula_ox_name : str, optional
Name of supplementary file containing data0 parameters and inferred
formula oxidation states. Ignored if `infer_formula_ox` is False.
See `infer_formula_ox` for more detail.
suppress_redox : list of str, default []
Suppress equilibrium between oxidation states of listed elements
(Cl, H, and O cannot be included).
water_model : str, default "SUPCRT92"
This is an experimental feature that is not yet fully supported.
Desired water model. Can be either "SUPCRT92", "IAPWS95", or "DEW".
These models are described here: http://chnosz.net/manual/water.html
exceed_Ttr : bool, default True
Calculate Gibbs energies of mineral phases and other species
beyond their transition temperatures?
grid_temps : list of eight float, default [0.0100, 50.0000, 100.0000, 150.0000, 200.0000, 250.0000, 300.0000, 350.0000]
Eight temperature values that make up the T-P grid.
grid_press : list of float, default "Psat"
Eight pressure values that make up the T-P grid. "Psat" for
calculations along the liquid-vapor saturation curve.
infer_formula_ox : bool, default False
Create a supplementary file containing data0 parameters and
inferred formula oxidation states? This option is useful for
creating as many entries in the formula_ox column when creating a
new supplementary file. Note that compounds like DySO4+ result in
blank entries in formula_ox because the redox states of two
elements, Dy and S, would have to be estimated together; S has many
oxidation states and Dy's oxidation states are not hard-coded.
generate_template : bool, default True
Generate a CSV sample input template customized to this data0?
Columns include 'Sample', 'Temperature', 'logfO2', and all strict
basis species.
template_name : str, optional
Name of the sample input template file generated. If no name is
supplied, defaults to 'sample_template_xyz.csv', where 'xyz' is
the three letter code given to `db`. Ignored if `generate_template`
is False.
template_type : str, either 'strict', 'all basis', or 'all species'
Determines which columns are written to the sample template.
- 'strict' includes strict basis species
- 'all basis' includes strict and auxiliary basis species
- 'all species' includes all species in the thermodynamic database
Ignored if `generate_template` is False.
verbose : int, 0, 1, or 2, default 1
Level determining how many messages are returned during a
calculation. 2 for all messages, 1 for errors or warnings only,
0 for silent.
"""
# Check that thermodynamic database input files exist and are formatted
# correctly.
self._check_database_file(filename)
if filename_ss != None:
self.__file_exists(filename_ss)
self.verbose = verbose
if self.verbose >= 1:
print("Creating data0.{}...".format(db), flush=True)
if len(grid_temps) > 8 or len(grid_temps) < 1:
self.err_handler.raise_exception("'grid_temps' must have eight values.")
if isinstance(grid_press, list):
if len(grid_press) > 8 or len(grid_press) < 1:
self.err_handler.raise_exception("'grid_press' must have eight values.")
if sum([T >= 10000 for T in grid_temps]):
self.err_handler.raise_exception("Grid temperatures must be below 10000 °C.")
if isinstance(grid_press, list):
if sum([P >= 10000 for P in grid_press]):
self.err_handler.raise_exception("Grid pressures must be below 10000 bars.")
if water_model == "SUPCRT92":
min_T = 0
max_T = 2250
min_P = 1
max_P = 30000
elif water_model == "IAPWS95":
min_T = 0
max_T = 1000
min_P = 1
max_P = 10000
elif water_model == "DEW":
min_T = 0
max_T = 1000
min_P = 1
max_P = 60000
else:
self.err_handler.raise_exception("The water model '{}' ".format(water_model)+"is not "
"recognized. Try 'SUPCRT92', 'IAPWS95', or 'DEW'.")
# check that T and P are above minimum values
if sum([T <= min_T for T in grid_temps]):
print("WARNING: one or more temperatures in 'grid_temps' is below "
"or equal to {} °C".format(min_T)+" and is outside the valid "
"temperature range for the {} water model.".format(water_model))
if isinstance(grid_press, list):
if sum([P < min_P for P in grid_press]):
print("WARNING: one or more pressures in 'grid_press' is below "
"{} bar".format(min_P)+", the minimum valid "
"pressure for the {} water model.".format(water_model))
# check that T and P are below maximum values
if sum([T > max_T for T in grid_temps]):
print("WARNING: one or more temperatures in 'grid_temps' is above "
"{} °C".format(max_T)+", the maximum valid "
"temperature for the {} water model.".format(water_model))
if isinstance(grid_press, list):
if sum([P > max_P for P in grid_press]):
print("WARNING: one or more pressures in 'grid_press' is above "
"{} bar".format(max_P)+", the maximum valid "
"pressure for the {} water model.".format(water_model))
if water_model != "SUPCRT92":
print("WARNING: water models other than SUPCRT92 are not yet fully supported.")
template = pkg_resources.resource_string(
__name__, 'data0.min').decode("utf-8")
grid_temps = convert_to_RVector(grid_temps)
grid_press = convert_to_RVector(grid_press)
suppress_redox = convert_to_RVector(suppress_redox)
if filename_ss == None:
filename_ss = ro.r("NULL")
if data0_formula_ox_name == None:
data0_formula_ox_name = ro.r("NULL")
if template_name == None:
template_name = "sample_template_{}.csv".format(db)
if template_type not in ['strict', 'all basis', 'all species']:
self.err_handler.raise_exception("template_type {} ".format(template_type)+"is not"
"recognized. Try 'strict', 'all basis', or 'all species'")
self.__capture_r_output()
r_create_data0 = pkg_resources.resource_string(
__name__, 'create_data0.r').decode("utf-8")
ro.r(r_create_data0)
ro.r.main_create_data0(filename=filename,
filename_ss=filename_ss,
grid_temps=grid_temps,
grid_press=grid_press,
db=db,
water_model=water_model,
template=template,
exceed_Ttr=exceed_Ttr,
data0_formula_ox_name=data0_formula_ox_name,
suppress_redox=suppress_redox,
infer_formula_ox=infer_formula_ox,
generate_template=generate_template,
template_name=template_name,
template_type=template_type,
verbose=self.verbose)
for line in self.stderr: print(line)
if self.verbose > 0:
print("Finished creating data0.{}.".format(db))
def make_redox_reactions(self, redox_pairs="all"):
"""
Generate an organized collection of redox reactions for calculating
chemical affinity and energy supply values during speciation.
Parameters
----------
redox_pairs : list of int or "all", default "all"
List of indices of half reactions in the half cell reaction table
to be combined when generating full redox reactions.
E.g. [0, 1, 4] will combine half reactions with indices 0, 1, and 4
in the table stored in the `half_cell_reactions` attribute of the
`AqEquil` class.
If "all", generate all possible redox reactions from available half
cell reactions.
Returns
----------
Output is stored in the `affinity_energy_reactions_raw` and
`affinity_energy_reactions_table` attributes of the `AqEquil` class.
"""
# reset all redox variables stored in the AqEquil class
self.affinity_energy_reactions_raw = None
self.affinity_energy_reactions_table = None
self.affinity_energy_formatted_reactions = None
if self.verbose != 0:
print("Generating redox reactions...")
err_msg = ("redox_pairs can either be 'all' or a list of integers "
"indicating the indices of half cell reactions in "
"the half_cell_reactions table that should be combined into "
"full redox reactions. For example, redox_pairs=[0, 1, 2, 6] "
"will combine half cell reactions with indices 0, 1, 2, and 6 in "
"the half_cell_reactions table. This table is an attribute in the "
"class AqEquil.")
if isinstance(redox_pairs, str):
if redox_pairs == "all":
redox_pairs = list(range(0, self.half_cell_reactions.shape[0]))
else:
self.err_handler.raise_exception(err_msg)
elif isinstance(redox_pairs, list):
if not all([isinstance(i, int) for i in redox_pairs]):
self.err_handler.raise_exception(err_msg)
else:
self.err_handler.raise_exception(err_msg)
self.redox_pairs = redox_pairs
df = self.half_cell_reactions.iloc[redox_pairs].reset_index(drop=True)
Oxidant_1 = df['Oxidant_1']
Oxidant_2 = df['Oxidant_2']
Oxidant_3 = df['Oxidant_3']
Reductant_1 = df['Reductant_1']
Reductant_2 = df['Reductant_2']
#CREATING A LIST OF ALL SPECIES AND THEIR ELEMENT DICTIONARIES
df_oxidants_and_reductants = self.half_cell_reactions.loc[:, "Oxidant_1":].values.tolist()
flat_list = flatten_list(df_oxidants_and_reductants)
oxidants_and_reductants = [sp for sp in list(dict.fromkeys(flat_list)) if str(sp) != 'nan']
elements = [list(parse_formula(sp).keys()) for sp in oxidants_and_reductants]
elements = flatten_list(elements)
elements = list(dict.fromkeys(elements))
# get a dictionary of species and their elemental makeup:
# {'HNO3': {'H': 1.0, 'N': 1.0, 'O': 3.0, '-': 0, 'Fe': 0, 'C': 0, 'S': 0, '+': 0}, ...}
element_dictionary = dict()
for sp in oxidants_and_reductants:
element_dictionary[sp] = parse_formula(sp)
for e in elements:
if element_dictionary[sp].get(e, 0) == 0:
element_dictionary[sp][e] = 0
df_reax = pd.DataFrame() # empty df of reactions
df_reax['rO_coeff'] = ''
df_reax['rO'] = ''
df_reax['rR_coeff'] = ''
df_reax['rR'] = ''
df_reax['pO_coeff'] = ''
df_reax['pO'] = ''
df_reax['pR_coeff'] = ''
df_reax['pR'] = ''
df_reax['Reaction'] = ''
index = 0
reaction = []
indices = np.arange(0, len(df['Oxidant_1']), 1).tolist()*2 # how to loop back through the redox pairs to not run into out-of-range index
rxn_num = 0 # counting unique reactions
rxn_list = [] # list of unique reactions
rxn_names = []
rxn_pairs = [] # list of paired half reactions
for i in range(0, len(df['Oxidant_1']),1): # length of redox pairs - columns
for n in range(0, len(df['Oxidant_1']), 1):
if Reductant_1[i] == Reductant_1[indices[i+n]] or Reductant_1[i] == Reductant_2[indices[i+n]]: # if both reductants are the same thing, skip
continue
if Oxidant_1[i] == Oxidant_1[indices[i+n]] or Oxidant_1[i] == Oxidant_2[indices[i+n]] or Oxidant_1[i] == Oxidant_3[indices[i+n]]:
continue
# if Oxidant_1[i] == 'H2O' and Reductant_1[indices[i+n]] == 'H2O': #suppress the splitting of water
# continue
else:
# GENERATING REACTIONS BETWEEN OXIDANT_1 AND REDUCTANT_1
reaction.append(Oxidant_1[i]+' \t ' + Reductant_1[indices[i+n]] + '=' + Reductant_1[i] + ' \t ' + Oxidant_1[indices[i+n]])
rxn_num+=1
rxn_list.append(rxn_num)
rxn_names.append('red_'+Oxidant_1[i]+'_'+Reductant_1[i]+'_ox_'+Reductant_1[indices[i+n]]+'_'+Oxidant_1[indices[i+n]])
df_reax.loc[index, 'rO'] = Oxidant_1[i]
df_reax.loc[index, 'rR'] = Reductant_1[indices[i+n]]
df_reax.loc[index, 'pR'] = Reductant_1[i]
df_reax.loc[index, 'pO'] = Oxidant_1[indices[i+n]]
rxn_pairs.append([i, indices[i+n]])
index+=1
# REACTIONS INVOLVING OTHER PH-DEPENDENT SPECIES
if pd.isnull(Oxidant_2[i]) != True:
reaction.append(Oxidant_2[i] + ' \t ' + Reductant_1[indices[i+n]] + '=' + Reductant_1[i] + ' \t ' + Oxidant_1[indices[i+n]])
rxn_list.append(rxn_num)
rxn_names.append('red_'+Oxidant_1[i]+'_'+Reductant_1[i]+'_ox_'+Reductant_1[indices[i+n]]+'_'+Oxidant_1[indices[i+n]])
df_reax.loc[index, 'rO'] = Oxidant_2[i]
df_reax.loc[index, 'rR'] = Reductant_1[indices[i+n]]
df_reax.loc[index, 'pR'] = Reductant_1[i]
df_reax.loc[index, 'pO'] = Oxidant_1[indices[i+n]]
rxn_pairs.append([i, indices[i+n]])
index +=1
if pd.isnull(Reductant_2[indices[i+n]]) != True:
reaction.append(Oxidant_2[i] + ' \t ' + Reductant_2[indices[i+n]] + '=' + Reductant_1[i] + ' \t ' + Oxidant_1[indices[i+n]])
rxn_list.append(rxn_num)
rxn_names.append('red_'+Oxidant_1[i]+'_'+Reductant_1[i]+'_ox_'+Reductant_1[indices[i+n]]+'_'+Oxidant_1[indices[i+n]])
df_reax.loc[index, 'rO'] = Oxidant_2[i]
df_reax.loc[index, 'rR'] = Reductant_2[indices[i+n]]
df_reax.loc[index, 'pR'] = Reductant_1[i]
df_reax.loc[index, 'pO'] = Oxidant_1[indices[i+n]]
rxn_pairs.append([i, indices[i+n]])
index +=1
if pd.isnull(Reductant_2[indices[i+n]]) != True:
reaction.append(Oxidant_1[i] + ' \t ' + Reductant_2[indices[i+n]] + '=' + Reductant_1[i] + ' \t ' + Oxidant_1[indices[i+n]])
rxn_list.append(rxn_num)
rxn_names.append('red_'+Oxidant_1[i]+'_'+Reductant_1[i]+'_ox_'+Reductant_1[indices[i+n]]+'_'+Oxidant_1[indices[i+n]])
df_reax.loc[index, 'rO'] = Oxidant_1[i]
df_reax.loc[index, 'rR'] = Reductant_2[indices[i+n]]
df_reax.loc[index, 'pR'] = Reductant_1[i]
df_reax.loc[index, 'pO'] = Oxidant_1[indices[i+n]]
rxn_pairs.append([i, indices[i+n]])
index +=1
if pd.isnull(Oxidant_3[i]) != True:
reaction.append(Oxidant_3[i] + ' \t ' + Reductant_1[indices[i+n]] + '=' + Reductant_1[i] + ' \t ' + Oxidant_1[indices[i+n]])
rxn_list.append(rxn_num)
rxn_names.append('red_'+Oxidant_1[i]+'_'+Reductant_1[i]+'_ox_'+Reductant_1[indices[i+n]]+'_'+Oxidant_1[indices[i+n]])
df_reax.loc[index, 'rO'] = Oxidant_3[i]
df_reax.loc[index, 'rR'] = Reductant_1[indices[i+n]]
df_reax.loc[index, 'pR'] = Reductant_1[i]
df_reax.loc[index, 'pO'] = Oxidant_1[indices[i+n]]
rxn_pairs.append([i, indices[i+n]])
index +=1
if pd.isnull(Reductant_2[indices[i+n]]) != True:
reaction.append(Oxidant_3[i] + ' \t ' + Reductant_2[indices[i+n]] + '=' + Reductant_1[i] + ' \t ' + Oxidant_1[indices[i+n]])
rxn_list.append(rxn_num)
rxn_names.append('red_'+Oxidant_1[i]+'_'+Reductant_1[i]+'_ox_'+Reductant_1[indices[i+n]]+'_'+Oxidant_1[indices[i+n]])
df_reax.loc[index, 'rO'] = Oxidant_3[i]
df_reax.loc[index, 'rR'] = Reductant_2[indices[i+n]]
df_reax.loc[index, 'pR'] = Reductant_1[i]
df_reax.loc[index, 'pO'] = Oxidant_1[indices[i+n]]
rxn_pairs.append([i, indices[i+n]])
index +=1
# rxn_pairs_orig = [[redox_pairs[pair[0]], redox_pairs[pair[1]]] for pair in rxn_pairs]
# self.rxn_pairs_main = np.unique(np.array(rxn_pairs_orig), axis=0).tolist()
df_reax['Reaction'] = rxn_list
df_reax['Names'] = rxn_names
df_reax['Temp_Pairs'] = rxn_pairs
# if there are no reactions, return nothing
if df_reax.shape[0] == 0:
incompatible_half_reactions = pd.Series(self.half_cell_reactions["Redox Couple"], index=redox_pairs).tolist()
redundant_reductant_or_oxidant = []
for col in ["Oxidant_1", "Oxidant_2", "Oxidant_3", "Reductant_1", "Reductant_2"]:
redox_col = pd.Series(self.half_cell_reactions[col], index=[0,1])
if redox_col.eq(redox_col[0]).all():
redundant_reductant_or_oxidant.append(redox_col[0])
err_no_rxns = ("Valid reactions could not be written between the half "
"reactions {} ".format(incompatible_half_reactions)+"because "
"{}".format(redundant_reductant_or_oxidant)+" is on both sides "
"of all reactions.")
print(err_no_rxns)
return
### BALANCING NON-O, H ELEMENTS
for r in range(0, len(df_reax['rO'])):
count = 0 #to restart the loop through the elements
temp_rO_coeff = [1] *4 #C, N, S, Fe
temp_rR_coeff = [1] *4 #C, N, S, Fe
temp_pO_coeff = [1] *4 #C, N, S, Fe
temp_pR_coeff = [1] *4 #C, N, S, Fe
for e in ['C','N','S','Fe']:
temp1 = int(element_dictionary[df_reax['rO'][r]][e]) #count for the element in the list for rO at index r
temp2 = int(element_dictionary[df_reax['pR'][r]][e])
temp3 = int(element_dictionary[df_reax['rR'][r]][e])
temp4 = int(element_dictionary[df_reax['pO'][r]][e])
if temp1 == temp2:
temp_rO_coeff[count] = 1
temp_pR_coeff[count] = 1
if temp1 != temp2:
if temp1 ==0:
temp_rO_coeff[count] = 1
if temp1 != 0:
temp_rO_coeff[count] = np.lcm(temp1,temp2)/temp1
if temp2 == 0:
temp_pR_coeff[count] = 1
if temp2 != 0:
temp_pR_coeff[count] = np.lcm(temp1,temp2)/temp2
if temp3 == temp4:
temp_rR_coeff[count] = 1
temp_pO_coeff[count] = 1
if temp4 != temp3:
if temp3 == 0:
temp_rR_coeff[count] = 1
if temp3 != 0:
temp_rR_coeff[count] = np.lcm(temp3,temp4)/temp3
if temp4 == 0.0:
temp_pO_coeff[count] = 1
if temp4 !=0.0:
temp_pO_coeff[count] = np.lcm(temp3,temp4)/temp4
count +=1
df_reax.loc[r, 'rO_coeff'] = -max(temp_rO_coeff)
df_reax.loc[r, 'rR_coeff'] = -max(temp_rR_coeff)
df_reax.loc[r, 'pR_coeff'] = max(temp_pR_coeff)
df_reax.loc[r, 'pO_coeff'] = max(temp_pO_coeff)
all_reax = df_reax.copy(deep=True)
all_reax['rO_2_coeff'] = ''
all_reax['rO_2'] = ''
all_reax['rO_3_coeff'] = ''
all_reax['rO_3'] = ''
all_reax['rR_2_coeff'] = ''
all_reax['rR_2'] = ''
### MAIN REACTION
for r in range(1, max(all_reax['Reaction']+1)): # each reaction number once, 1 to 305
if len(all_reax[all_reax['Reaction']==r].index.values) == 1: # if nothing to combine, skip
continue
else:
temp = all_reax[all_reax['Reaction']==r].index.values[0] #index of first instance of this reaction which has multiple subreactions
lst2 = []
all_reax.loc[temp-0.5] = all_reax.loc[temp] # replicating the row to build on
all_reax = all_reax.sort_index() # putting the replicated row above the first instance
for i in all_reax[all_reax['Reaction']==r].index.values[2:]: #all but the first instance in the reactions (since that's copied already)
if all_reax.loc[i, 'rO'] != all_reax.loc[temp, 'rO'] and all_reax.loc[i, 'rO'] not in lst2: # if rO is new (and not the same as the first)
lst2.append(all_reax.loc[i, 'rO']) # list unique rO besides the first
for l in range(0, len(lst2)): # looping through unique rO
temp2 = 'rO_'+str(2+int(l)) # adding 0 or 1 to the rO number
all_reax.loc[temp-0.5,str(temp2)] = lst2[l] # add the unique rO to rO_2 or 3
if all_reax.loc[i, 'rR'] != all_reax.loc[temp, 'rR']: # if rR is new
all_reax.loc[temp-0.5,'rR_2'] = all_reax.loc[i, 'rR'] # add it to rR_2
##CHANGING COEFFICIENTS
rO = all_reax.loc[temp-0.5,'rO'] #assigning easy variables
rO_2 = all_reax.loc[temp-0.5,'rO_2']
rO_3 = all_reax.loc[temp-0.5,'rO_3']
rR = all_reax.loc[temp-0.5,'rR']
rR_2 = all_reax.loc[temp-0.5,'rR_2']
rO_coeff = all_reax.loc[temp-0.5,'rO_coeff'] #these are empty at the moment
rO_2_coeff = all_reax.loc[temp-0.5,'rO_2_coeff']
rO_3_coeff = all_reax.loc[temp-0.5,'rO_3_coeff']
rR_2_coeff = all_reax.loc[temp-0.5,'rR_2_coeff']
rR_coeff = all_reax.loc[temp-0.5,'rR_coeff']
if rO_3 != '' and rO_2 != '': #if DIC is the oxidant
all_reax.loc[temp-0.5,'rO_coeff'] = rO_coeff/3 #this works fine
all_reax.loc[temp-0.5,'rO_2_coeff'] = rO_coeff/3
all_reax.loc[temp-0.5,'rO_3_coeff'] = rO_coeff/3
if rR_2 != '':
all_reax.loc[temp-0.5,'rR_coeff'] = rR_coeff/2 #this works fine
all_reax.loc[temp-0.5,'rR_2_coeff'] = rR_coeff/2
all_reax.loc[temp-0.4] = all_reax.loc[temp-0.5] #NEW ROW WITH ONLY rR - this works fine : line 4
all_reax.loc[temp-0.4, 'rR_2_coeff'] = 0
all_reax.loc[temp-0.4, 'rR_coeff'] = rR_coeff
all_reax.loc[temp-0.3] = all_reax.loc[temp-0.5] #NEW ROW WITH ONLY rR_2 - this works fine: line 5
all_reax.loc[temp-0.3, 'rR_2_coeff'] = rR_coeff
all_reax.loc[temp-0.3, 'rR_coeff'] = 0
#NEW ROWS WITH TWO DIC AND BOTH rR
all_reax.loc[temp-0.25] = all_reax.loc[temp-0.5] #eliminate CO2
all_reax.loc[temp-0.25, 'rO_coeff'] = 0
all_reax.loc[temp-0.25, 'rO_2_coeff'] = rO_coeff/2
all_reax.loc[temp-0.25, 'rO_3_coeff'] = rO_coeff/2
all_reax.loc[temp-0.24] = all_reax.loc[temp-0.5] #eliminate HCO3-
all_reax.loc[temp-0.24, 'rO_2_coeff'] = 0
all_reax.loc[temp-0.24, 'rO_coeff'] = rO_coeff/2
all_reax.loc[temp-0.24, 'rO_3_coeff'] = rO_coeff/2
all_reax.loc[temp-0.23] = all_reax.loc[temp-0.5] #eliminate CO3-2
all_reax.loc[temp-0.23, 'rO_3_coeff'] = 0
all_reax.loc[temp-0.23, 'rO_2_coeff'] = rO_coeff/2
all_reax.loc[temp-0.23, 'rO_coeff'] = rO_coeff/2
all_reax.loc[temp-0.2] = all_reax.loc[temp-0.4] #NEW ROW WITH ONLY rR ELIMINATING CO2: line 6
all_reax.loc[temp-0.2, 'rO_coeff'] = 0
all_reax.loc[temp-0.2, 'rO_2_coeff'] = rO_coeff/2
all_reax.loc[temp-0.2, 'rO_3_coeff'] = rO_coeff/2
all_reax.loc[temp-0.1] = all_reax.loc[temp-0.4] #NEW ROW WITH ONLY rR ELIMINATING HCO3-: line 7
all_reax.loc[temp-0.1, 'rO_coeff'] = rO_coeff/2
all_reax.loc[temp-0.1, 'rO_2_coeff'] = 0
all_reax.loc[temp-0.1, 'rO_3_coeff'] = rO_coeff/2
all_reax.loc[temp-0.05] = all_reax.loc[temp-0.4] #NEW ROW WITH ONLY rR ELIMINATING CO3-2: line 8
all_reax.loc[temp-0.05, 'rO_coeff'] = rO_coeff/2
all_reax.loc[temp-0.05, 'rO_2_coeff'] = rO_coeff/2
all_reax.loc[temp-0.05, 'rO_3_coeff'] = 0
all_reax.loc[temp-0.04] = all_reax.loc[temp-0.3] #NEW ROW WITH ONLY rR_2 ELIMINATING CO2: line 9
all_reax.loc[temp-0.04, 'rO_coeff'] = 0
all_reax.loc[temp-0.04, 'rO_2_coeff'] = rO_coeff/2
all_reax.loc[temp-0.04, 'rO_3_coeff'] = rO_coeff/2
all_reax.loc[temp-0.03] = all_reax.loc[temp-0.3] #NEW ROW WITH ONLY rR_2 ELIMINATING HCO3-: line 10
all_reax.loc[temp-0.03, 'rO_coeff'] = rO_coeff/2
all_reax.loc[temp-0.03, 'rO_2_coeff'] = 0
all_reax.loc[temp-0.03, 'rO_3_coeff'] = rO_coeff/2
all_reax.loc[temp-0.02] = all_reax.loc[temp-0.3] #NEW ROW WITH ONLY rR_2 ELIMINATING CO3-2: line 11
all_reax.loc[temp-0.02, 'rO_coeff'] = rO_coeff/2
all_reax.loc[temp-0.02, 'rO_2_coeff'] = rO_coeff/2
all_reax.loc[temp-0.02, 'rO_3_coeff'] = 0
all_reax.loc[temp-0.01] = all_reax.loc[temp-0.5] #NEW ROW WITH ONLY CO2 and both RR
all_reax.loc[temp-0.01, 'rO_coeff'] = rO_coeff
all_reax.loc[temp-0.01, 'rO_2_coeff'] = 0
all_reax.loc[temp-0.01, 'rO_3_coeff'] = 0
all_reax.loc[temp-0.009] = all_reax.loc[temp-0.5] #NEW ROW WITH ONLY HCO3- and both RR
all_reax.loc[temp-0.009, 'rO_coeff'] = 0 ###
all_reax.loc[temp-0.009, 'rO_2_coeff'] = rO_coeff
all_reax.loc[temp-0.009, 'rO_3_coeff'] = 0
all_reax.loc[temp-0.008] = all_reax.loc[temp-0.5] #NEW ROW WITH ONLY CO2 and both RR
all_reax.loc[temp-0.008, 'rO_coeff'] = 0
all_reax.loc[temp-0.008, 'rO_2_coeff'] = 0
all_reax.loc[temp-0.008, 'rO_3_coeff'] = rO_coeff
if rR_2 == '':
all_reax.loc[temp-0.2] = all_reax.loc[temp-0.5] #NEW ROW ELIMINATING CO2
all_reax.loc[temp-0.2, 'rO_coeff'] = 0
all_reax.loc[temp-0.2, 'rO_2_coeff'] = rO_coeff/2
all_reax.loc[temp-0.2, 'rO_3_coeff'] = rO_coeff/2
all_reax.loc[temp-0.1] = all_reax.loc[temp-0.5] #NEW ROW ELIMINATING HCO3-
all_reax.loc[temp-0.1, 'rO_coeff'] = rO_coeff/2
all_reax.loc[temp-0.1, 'rO_2_coeff'] = 0
all_reax.loc[temp-0.1, 'rO_3_coeff'] = rO_coeff/2
all_reax.loc[temp-0.05] = all_reax.loc[temp-0.5] #NEW ROW ELIMINATING CO3-2
all_reax.loc[temp-0.05, 'rO_coeff'] = rO_coeff/2
all_reax.loc[temp-0.05, 'rO_2_coeff'] = rO_coeff/2
all_reax.loc[temp-0.05, 'rO_3_coeff'] = 0
if rO_2 != '' and rO_3 == '': # IF THERE ARE TWO OXIDANT OPTIONS
all_reax.loc[temp-0.5,'rO_coeff'] = rO_coeff/2
all_reax.loc[temp-0.5,'rO_2_coeff'] = rO_coeff/2
if rR_2 != '': #IF THERE ARE TWO REDUCTANT OPTIONS
all_reax.loc[temp-0.5,'rR_coeff'] = rR_coeff/2
all_reax.loc[temp-0.5,'rR_2_coeff'] = rR_coeff/2
all_reax.loc[temp-0.4] = all_reax.loc[temp-0.5] #NEW ROW WITH ONLY rR
all_reax.loc[temp-0.4, 'rR_2_coeff'] = 0
all_reax.loc[temp-0.4, 'rR_coeff'] = rR_coeff
all_reax.loc[temp-0.3] = all_reax.loc[temp-0.5] #NEW ROW WITH ONLY rR_2
all_reax.loc[temp-0.3, 'rR_2_coeff'] = rR_coeff
all_reax.loc[temp-0.3, 'rR_coeff'] = 0
all_reax.loc[temp-0.2] = all_reax.loc[temp-0.5] #NEW ROW WITH ONLY rO_2
all_reax.loc[temp-0.2, 'rO_2_coeff'] = rO_coeff
all_reax.loc[temp-0.2, 'rO_coeff'] = 0
all_reax.loc[temp-0.1] = all_reax.loc[temp-0.5] #NEW ROW WITH ONLY rO
all_reax.loc[temp-0.1, 'rO_2_coeff'] = 0
all_reax.loc[temp-0.1, 'rO_coeff'] = rO_coeff
if rO_2 == '' and rO_3 == '' and rR_2 != '': # IF THERE IS ONLY ONE OXIDANT BUT TWO REDUCTANTS
all_reax.loc[temp-0.5,'rR_coeff'] = rR_coeff/2
all_reax.loc[temp-0.5,'rR_2_coeff'] = rR_coeff/2
all_reax = all_reax.sort_index().reset_index(drop=True)
pair_list = []
for i in range(0, len(all_reax['Temp_Pairs'])):
pair_list.append([redox_pairs[all_reax.loc[i, 'Temp_Pairs'][0]], redox_pairs[all_reax.loc[i, 'Temp_Pairs'][1]]])
all_reax['pairs'] = pair_list
reax = all_reax.copy(deep=True)
reax.drop('Temp_Pairs', axis=1, inplace=True)
reax.drop('pairs', axis=1, inplace=True)
reax.reset_index(drop=True, inplace=True)
for s in ['Fe', 'O', 'H','-','+']:
for i in range(0,len(reax['rO'])):
### assigning temporary values to each species
temp_rO_coeff = reax['rO_coeff'][i]
temp_rO = element_dictionary[reax['rO'][i]][s]
if reax['rO_2_coeff'][i] != '':
temp_rO_2_coeff = reax['rO_2_coeff'][i]
temp_rO_2 = element_dictionary[reax['rO_2'][i]][s]
else:
temp_rO_2_coeff = 0
temp_rO_2 = 0
if reax['rO_3_coeff'][i] != '':
temp_rO_3_coeff = reax['rO_3_coeff'][i]
temp_rO_3 = element_dictionary[reax['rO_3'][i]][s]
else:
temp_rO_3_coeff = 0
temp_rO_3 = 0
temp_rR_coeff = reax['rR_coeff'][i]
temp_rR = element_dictionary[reax['rR'][i]][s]
if reax['rR_2_coeff'][i] != '':
temp_rR_2_coeff = reax['rR_2_coeff'][i]
temp_rR_2 = element_dictionary[reax['rR_2'][i]][s]
else:
temp_rR_2_coeff = 0
temp_rR_2 = 0
temp_pO_coeff = reax['pO_coeff'][i]
temp_pO = element_dictionary[reax['pO'][i]][s]
temp_pR_coeff = reax['pR_coeff'][i]
temp_pR = element_dictionary[reax['pR'][i]][s]
r = 0-(temp_rO*temp_rO_coeff+temp_pR*temp_pR_coeff+temp_rO_2*temp_rO_2_coeff+temp_rO_3*temp_rO_3_coeff)
o = 0-(temp_rR*temp_rR_coeff+temp_rR_2*temp_rR_2_coeff+temp_pO*temp_pO_coeff)
reax.loc[i, 'r_'+s] = r
reax.loc[i, 'o_'+s] = o
reax['r_H'] = reax['r_H'] - 2*reax['r_O']
reax['o_H'] = reax['o_H'] - 2*reax['o_O']
reax['r_+'] = reax['r_+'] - 2*reax['r_Fe'] - reax['r_H']
reax['o_+'] = reax['o_+'] - 2*reax['o_Fe'] - reax['o_H']
reax['r_e-'] = reax['r_+'] - reax['r_-']
reax['o_e-'] = reax['o_+'] - reax['o_-']
reax.rename({'r_Fe': 'r_Fe+2', 'r_O': 'r_H2O', 'r_H': 'r_H+', 'o_Fe': 'o_Fe+2', 'o_O': 'o_H2O', 'o_H': 'o_H+'}, axis=1, inplace = True)
### MULTIPLYING SUB-REACTIONS
lcm_charge = []
electrons = []
for i in range(0, len(reax['rO'])):
# # for i in range(0, 10):
lcm_charge = np.lcm(round(reax['r_e-'][i]), round(reax['o_e-'][i]))
electrons.append(str(lcm_charge)+'e')
r_multiplier = abs(lcm_charge/int(reax['r_e-'][i]))
o_multiplier = abs(lcm_charge/int(reax['o_e-'][i]))
for r in ['rO_coeff', 'pR_coeff', 'r_H2O', 'r_Fe+2', 'r_H+']:
reax.loc[i, r] = reax.loc[i, r]*r_multiplier
for o in ['rR_coeff', 'pO_coeff', 'o_H2O', 'o_Fe+2', 'o_H+']:
reax.loc[i, o] = reax.loc[i, o]*o_multiplier
if reax.loc[i, 'rO_2_coeff'] != '':
reax.loc[i, 'rO_2_coeff'] = reax.loc[i, 'rO_2_coeff']*r_multiplier
if reax.loc[i, 'rO_3_coeff'] != '':
reax.loc[i, 'rO_3_coeff'] = reax.loc[i, 'rO_3_coeff']*r_multiplier
if reax.loc[i, 'rR_2_coeff'] != '':
reax.loc[i, 'rR_2_coeff'] = reax.loc[i, 'rR_2_coeff']*o_multiplier
reax['H+'] = reax['r_H+'] + reax['o_H+']
reax['protons'] = 'H+'
reax['H2O'] = reax['r_H2O'] + reax['o_H2O']
reax['water'] = 'H2O'
reax['Fe+2'] = reax['r_Fe+2'] + reax['o_Fe+2']
reax['iron'] = 'Fe+2'
reax.drop(columns = ['r_Fe+2', 'r_H2O', 'r_H+', 'r_-', 'r_+', 'r_e-', 'o_Fe+2', 'o_H2O', 'o_H+', 'o_-', 'o_+', 'o_e-'], axis=1, inplace=True)
old_new_dict = {'SO4-2':'SO4-2', 'HSO4-':'HSO4-','FeS2':'pyrite',
'H2S':'H2S', 'H2O':'H2O', 'O2':'O2', 'H2':'H2',
'Fe2O3':'hematite','Fe3O4':'magnetite', 'CO2':'CO2',
'CH4':'METHANE', 'HCO3-':'HCO3-','CO3-2':'CO3-2',
'NH3':'NH3', 'NH4+':'NH4+', 'NO2-':'NO2-', 'HNO2':'HNO2',
'NO3-':'NO3-', 'HNO3':'HNO3', 'S':'sulfur',
'FeOOH':'goethite', 'CO':'CO', 'H+':'H+', 'Fe+2':'Fe+2',
'HS-':'HS-'}
old_new_dict = {old:'start'+new+'end' for old,new in zip(old_new_dict.keys(), old_new_dict.values())}
real_reax = reax.replace(old_new_dict.keys(), old_new_dict.values())
count = 0
rxn_count = []
rxn_number = []
for i in real_reax['Reaction']:
if i not in rxn_count:
rxn_count.append(i)
rxn_number.append(real_reax['Names'][count])
name_count = 1
else:
rxn_number.append(real_reax['Names'][count]+ '_'+str(name_count)+'_sub')
name_count += 1
count += 1
real_reax.insert(0, 'Reaction Number', rxn_number)
# # real_reax.insert(1, 'electrons', '1e')
real_reax.insert(1, 'electrons', electrons)
lst3 = [] #list of reaction numbers
lst4 = [] #list of reactions with issues
for i in range(0, len(real_reax['Reaction'])):
if real_reax['Reaction'][i] not in lst3:
lst3.append(real_reax['Reaction'][i])
for j in lst3: #looping through reaction numbers
first_e = real_reax.loc[real_reax['Reaction'] == j]['electrons'].reset_index(drop=True)[0]
for k in real_reax.loc[real_reax['Reaction'] == j]['electrons'].reset_index(drop=True):
if k != first_e:
if j not in lst4:
lst4.append(j)
for l in lst4:
print(real_reax.loc[real_reax['Reaction'] == l])
real_reax.drop(labels='Reaction', axis=1, inplace = True)
real_reax.drop(columns = 'Names', inplace = True)
#real_reax.to_csv('template.txt', sep ='\t', header=False, index=False)
file = real_reax.to_csv(sep='\t', header=False, index=False, line_terminator='\n')
file = file.split("\n")
# Formatting species to preserve charges while using regex package
new2 = []
for i in old_new_dict.values():
i = i.replace('+','\+')
i = i.replace('-','\-')
new2.append(i)
newlines = []
for line in file:
line = line.strip()
for s in new2:
number = 0
match = re.findall('\W+\d+\.\d+\t'+s, line)
if len(match) > 1:
for m in match:
pos = re.findall('\t(\d+\.\d+)', m)
if len(pos) > 0:
number += float(pos[0])
neg = re.findall('-\d+\.\d+', m)
if len(neg) > 0:
number += float(neg[0])
line = line.replace(m,'')
line = line + '\t' + str(number) + '\t' + s
new_match = re.findall('\t0\.0\t'+s, line) + re.findall('\t0\t'+s, line) + re.findall('\t0\t'+s+'$', line)
if len(new_match) > 0:
for n in new_match:
line = line.replace(n, '')
line = re.sub('\t+', '\t', line)
line = line.replace('\t0.0\tH\t', '\t')
line = line.replace('\tH\t','\tH+\t')
line = line.replace('\\', '')
line = line.replace('start', '')
line = line.replace('end', '')
line = line.strip()
newlines.append(line)
self.affinity_energy_reactions_raw = "\n".join(newlines)
df_rxn = pd.DataFrame([x.split('\t') for x in self.affinity_energy_reactions_raw.split('\n')])
df_rxn.columns = df_rxn.columns.map(str)
df_rxn = df_rxn.rename(columns={"0": "reaction_name", "1": "mol_e-_transferred_per_mol_rxn"})
df_rxn.insert(1, 'redox_pairs', all_reax['pairs'])
df_rxn = df_rxn.set_index("reaction_name")
df_rxn = df_rxn[df_rxn['mol_e-_transferred_per_mol_rxn'].notna()]
self.affinity_energy_reactions_table = df_rxn
# rxn_pairs_all = []
# n = 0
# prev_was_sub = False
# for i, rxn in enumerate(self.affinity_energy_reactions_table.index.tolist()):
# if "_sub" not in rxn[-4:]:
# if prev_was_sub:
# n += 1
# prev_was_sub = False
# rxn_pairs_all.append(self.rxn_pairs_main[n])
# else:
# rxn_pairs_all.append(self.rxn_pairs_main[n])
# prev_was_sub = True
# self.rxn_pairs_all = rxn_pairs_all
# self.affinity_energy_reactions_table.insert(0, "redox_pairs", rxn_pairs_all)
prev_was_coeff = False
n = 1
for col in self.affinity_energy_reactions_table.iloc[:, 2:].columns:
if not prev_was_coeff:
new_col_name = "coeff_"+str(n)
prev_was_coeff = True
else:
new_col_name = "species_"+str(n)
prev_was_coeff = False
n += 1
self.affinity_energy_reactions_table = self.affinity_energy_reactions_table.rename(columns={col: new_col_name})
nonsub_reaction_names = [name for name in self.affinity_energy_reactions_table.index if "_sub" not in name[-4:]]
if self.verbose != 0:
print("{} redox reactions have been generated.".format(len(nonsub_reaction_names)))
def show_redox_reactions(self, formatted=True, charge_sign_at_end=False,
hide_subreactions=True, simplify=True,
show=True):
"""
Show a table of redox reactions generated with the function
`make_redox_reactions`.
Parameters
----------
formatted : bool, default True
Should reactions be formatted for html output?
charge_sign_at_end : bool, default False
Display charge with sign after the number (e.g. SO4 2-)? Ignored if
`formatted` is False.
hide_subreactions : bool, default True
Hide subreactions?
show : bool, default False
Show the table of reactions? Ignored if not run in a Jupyter
notebook.
Returns
----------
A pandas dataframe containing balanced redox reactions written in full.
"""
self.affinity_energy_formatted_reactions = copy.copy(self.affinity_energy_reactions_table.iloc[:, 0:1])
df = copy.copy(self.affinity_energy_reactions_table)
if simplify:
main_rxn_names = df.loc[[ind for ind in df.index if "_sub" not in ind[-4:]]].index
df = df.iloc[[i-1 for i in range(0, len(df.index)) if "_sub" not in df.index[i][-4:]]]
self.affinity_energy_formatted_reactions = copy.copy(df.iloc[:, 0:1])
reactions = []
for irow in range(0, df.shape[0]):
redox_pair = df.loc[df.index[irow], "redox_pairs"]
oxidant_1 = self.half_cell_reactions.loc[self.half_cell_reactions.index[redox_pair[0]], "Oxidant_1"]
oxidant_2 = self.half_cell_reactions.loc[self.half_cell_reactions.index[redox_pair[0]], "Oxidant_2"]
oxidant_3 = self.half_cell_reactions.loc[self.half_cell_reactions.index[redox_pair[0]], "Oxidant_3"]
reductant_1 = self.half_cell_reactions.loc[self.half_cell_reactions.index[redox_pair[1]], "Reductant_1"]
reductant_2 = self.half_cell_reactions.loc[self.half_cell_reactions.index[redox_pair[1]], "Reductant_2"]
oxidants = [ox for ox in [oxidant_1, oxidant_2, oxidant_3] if str(ox) != 'nan']
reductants = [rd for rd in [reductant_1, reductant_2] if str(rd) != 'nan']
if len(oxidants) > 1:
oxidant_sigma_needed = True
else:
oxidant_sigma_needed = False
if len(reductants) > 1:
reductant_sigma_needed = True
else:
reductant_sigma_needed = False
rxn_row = df.iloc[irow, 2:]
rxn = rxn_row[rxn_row.notna()]
coeffs = copy.copy(rxn[::2]).tolist()
names = copy.copy(rxn[1::2]).tolist()
# print(rxn.name)
if oxidant_sigma_needed or reductant_sigma_needed:
reactant_names = [names[i] for i in range(0, len(names)) if float(coeffs[i]) < 0]
for sp in reactant_names:
if sp in oxidants and oxidant_sigma_needed:
i = names.index(sp)
names[i] = u"\u03A3"+sp
if sp in reductants and reductant_sigma_needed:
i = names.index(sp)
names[i] = u"\u03A3"+sp
react_grid = pd.DataFrame({"coeff":coeffs, "name":names})
react_grid["coeff"] = pd.to_numeric(react_grid["coeff"])
react_grid = react_grid.astype({'coeff': 'float'})
reactants = " + ".join([(str(-int(react_grid["coeff"][i]) if react_grid["coeff"][i].is_integer() else -react_grid["coeff"][i])+" " if -react_grid["coeff"][i] != 1 else "") + react_grid["name"][i] for i in range(0, len(react_grid["name"])) if react_grid["coeff"][i] < 0])
products = " + ".join([(str(int(react_grid["coeff"][i]) if react_grid["coeff"][i].is_integer() else react_grid["coeff"][i])+" " if react_grid["coeff"][i] != 1 else "") + react_grid["name"][i] for i in range(0, len(react_grid["name"])) if react_grid["coeff"][i] > 0])
if formatted:
reactants = " + ".join([format_coeff(react_grid["coeff"][i]) + html_chemname_format_AqEquil(react_grid["name"][i], charge_sign_at_end=charge_sign_at_end) for i in range(0, len(react_grid["name"])) if react_grid["coeff"][i] < 0])
products = " + ".join([format_coeff(react_grid["coeff"][i]) + html_chemname_format_AqEquil(react_grid["name"][i], charge_sign_at_end=charge_sign_at_end) for i in range(0, len(react_grid["name"])) if react_grid["coeff"][i] > 0])
reaction = reactants + " = " + products
reactions.append(reaction)
self.affinity_energy_formatted_reactions["reaction"] = reactions[1:] + reactions[:1] # because reactions got rotated with respect to reaction names, rotate the other way
self.affinity_energy_formatted_reactions.index = main_rxn_names
else:
reactions = []
for irow in range(0, df.shape[0]):
redox_pair = df.loc[self.affinity_energy_reactions_table.index[irow], "redox_pairs"]
oxidant = redox_pair[0]
reductant = redox_pair[1]
rxn_row = df.iloc[irow, 2:]
rxn = rxn_row[rxn_row.notna()]
coeffs = copy.copy(rxn[::2]).tolist()
names = copy.copy(rxn[1::2]).tolist()
react_grid = pd.DataFrame({"coeff":coeffs, "name":names})
react_grid["coeff"] = pd.to_numeric(react_grid["coeff"])
react_grid = react_grid.astype({'coeff': 'float'})
reactants = " + ".join([(str(-int(react_grid["coeff"][i]) if react_grid["coeff"][i].is_integer() else -react_grid["coeff"][i])+" " if -react_grid["coeff"][i] != 1 else "") + react_grid["name"][i] for i in range(0, len(react_grid["name"])) if react_grid["coeff"][i] < 0])
products = " + ".join([(str(int(react_grid["coeff"][i]) if react_grid["coeff"][i].is_integer() else react_grid["coeff"][i])+" " if react_grid["coeff"][i] != 1 else "") + react_grid["name"][i] for i in range(0, len(react_grid["name"])) if react_grid["coeff"][i] > 0])
if formatted:
reactants = " + ".join([format_coeff(react_grid["coeff"][i]) + html_chemname_format_AqEquil(react_grid["name"][i], charge_sign_at_end=charge_sign_at_end) for i in range(0, len(react_grid["name"])) if react_grid["coeff"][i] < 0])
products = " + ".join([format_coeff(react_grid["coeff"][i]) + html_chemname_format_AqEquil(react_grid["name"][i], charge_sign_at_end=charge_sign_at_end) for i in range(0, len(react_grid["name"])) if react_grid["coeff"][i] > 0])
reaction = reactants + " = " + products
reactions.append(reaction)
self.affinity_energy_formatted_reactions["reaction"] = reactions
df_out = copy.copy(self.affinity_energy_formatted_reactions)
if hide_subreactions and not simplify:
df_out = self.affinity_energy_formatted_reactions.loc[[ind for ind in self.affinity_energy_formatted_reactions.index if "_sub" not in ind[-4:]]]
if isnotebook() and show:
display(HTML(df_out.to_html(escape=False)))
return df_out
def compare(*args):
"""
Combine two or more speciations into a single speciation object for
comparison. The speciation object returned by this function can produce
scatterplots, barplots, and mass contribution plots, and contains a report
that can be browsed with `lookup`. See documentation for the functions in
the `Speciation` class for more detail.
Parameters
----------
*args : two or more objects of class `Speciation` to compare
Returns
----------
An object of class `Speciation`.
"""
if all(["mass_contribution" in a.__dict__.keys() for a in args]):
allow_mass_contribution = True
mass_contribution_breaks = []
else:
allow_mass_contribution = False
for i,sp in enumerate(args):
if i == 0:
sp_total = copy.deepcopy(sp)
sp_total.sample_data = None
if allow_mass_contribution:
mass_contribution_breaks.append(0)
else:
sp_i = copy.deepcopy(sp)
if allow_mass_contribution:
mass_contribution_breaks.append(sp_total.report.shape[0])
sp_total.report = pd.concat([sp_total.report, sp_i.report], axis=0, sort=False)
sp_total.report.index = sp_total.report.index + ("_"+sp_total.report.groupby(level=0).cumcount().astype(str)).replace('_0','')
if allow_mass_contribution:
mass_contribution_breaks.append(len(sp_total.report.index))
mc_sample_names_with_suffixes = list(sp_total.report.index)
for i,sp in enumerate(args):
mc_i = copy.deepcopy(sp.mass_contribution)
new_sample_names = copy.copy(mc_sample_names_with_suffixes[mass_contribution_breaks[i]:mass_contribution_breaks[i+1]])
old_sample_names = list(args[i].sample_data.keys())
old_new_sample_name_dict = {old:new for old,new in zip(old_sample_names, new_sample_names)}
newsample = [old_new_sample_name_dict[old] for old in mc_i["sample"]]
mc_i["sample"] = newsample
if i == 0:
mc_total = mc_i
else:
mc_total = pd.concat([mc_total, mc_i], axis=0, sort=False)
sp_total.mass_contribution = mc_total
else:
def no_mass_contrib_message(*args, **kwargs):
print("Mass contributions cannot be compared between these speciations "
"because one or more lack mass contribution data.")
sp_total.plot_mass_contribution = no_mass_contrib_message
return sp_total
Functions
def compare(*args)-
Combine two or more speciations into a single speciation object for comparison. The speciation object returned by this function can produce scatterplots, barplots, and mass contribution plots, and contains a report that can be browsed with
lookup. See documentation for the functions in theSpeciationclass for more detail.Parameters
*args:twoormore objectsofclassSpeciationto compare()
Returns
An object of class
Speciation.Expand source code
def compare(*args): """ Combine two or more speciations into a single speciation object for comparison. The speciation object returned by this function can produce scatterplots, barplots, and mass contribution plots, and contains a report that can be browsed with `lookup`. See documentation for the functions in the `Speciation` class for more detail. Parameters ---------- *args : two or more objects of class `Speciation` to compare Returns ---------- An object of class `Speciation`. """ if all(["mass_contribution" in a.__dict__.keys() for a in args]): allow_mass_contribution = True mass_contribution_breaks = [] else: allow_mass_contribution = False for i,sp in enumerate(args): if i == 0: sp_total = copy.deepcopy(sp) sp_total.sample_data = None if allow_mass_contribution: mass_contribution_breaks.append(0) else: sp_i = copy.deepcopy(sp) if allow_mass_contribution: mass_contribution_breaks.append(sp_total.report.shape[0]) sp_total.report = pd.concat([sp_total.report, sp_i.report], axis=0, sort=False) sp_total.report.index = sp_total.report.index + ("_"+sp_total.report.groupby(level=0).cumcount().astype(str)).replace('_0','') if allow_mass_contribution: mass_contribution_breaks.append(len(sp_total.report.index)) mc_sample_names_with_suffixes = list(sp_total.report.index) for i,sp in enumerate(args): mc_i = copy.deepcopy(sp.mass_contribution) new_sample_names = copy.copy(mc_sample_names_with_suffixes[mass_contribution_breaks[i]:mass_contribution_breaks[i+1]]) old_sample_names = list(args[i].sample_data.keys()) old_new_sample_name_dict = {old:new for old,new in zip(old_sample_names, new_sample_names)} newsample = [old_new_sample_name_dict[old] for old in mc_i["sample"]] mc_i["sample"] = newsample if i == 0: mc_total = mc_i else: mc_total = pd.concat([mc_total, mc_i], axis=0, sort=False) sp_total.mass_contribution = mc_total else: def no_mass_contrib_message(*args, **kwargs): print("Mass contributions cannot be compared between these speciations " "because one or more lack mass contribution data.") sp_total.plot_mass_contribution = no_mass_contrib_message return sp_total def convert_to_RVector(value, force_Rvec=True)-
Convert a value or list into an R vector of the appropriate type.
Parameters
value:numericorstr,orlistofnumericorstr- Value to be converted.
force_Rvec:bool, defaultTrue- If
valueis not a list, force conversion into a R vector? False will return an int, float, or str if value is non-list. True will always return an R vector.
Returns
int, float, str,oran rpy2 R vector- A value or R vector of an appropriate data type.
Expand source code
def convert_to_RVector(value, force_Rvec=True): """ Convert a value or list into an R vector of the appropriate type. Parameters ---------- value : numeric or str, or list of numeric or str Value to be converted. force_Rvec : bool, default True If `value` is not a list, force conversion into a R vector? False will return an int, float, or str if value is non-list. True will always return an R vector. Returns ------- int, float, str, or an rpy2 R vector A value or R vector of an appropriate data type. """ if not isinstance(value, list) and not force_Rvec: return value elif not isinstance(value, list) and force_Rvec: value = [value] else: pass if all(isinstance(x, bool) for x in value): return ro.BoolVector(value) elif all(isinstance(x, int) for x in value): return ro.IntVector(value) elif all(isinstance(x, float) or isinstance(x, int) for x in value): return ro.FloatVector(value) else: return ro.StrVector([str(v) for v in value]) def flatten_list(_2d_list)-
Flatten a list of lists. Code from: https://stackabuse.com/python-how-to-flatten-list-of-lists/
Expand source code
def flatten_list(_2d_list): """ Flatten a list of lists. Code from: https://stackabuse.com/python-how-to-flatten-list-of-lists/ """ flat_list = [] # Iterate through the outer list for element in _2d_list: if type(element) is list: # If the element is of type list, iterate through the sublist for item in element: flat_list.append(item) else: flat_list.append(element) return flat_list def float_to_formatted_fraction(x, error=1e-06)-
Format a fraction for html.
Expand source code
def float_to_formatted_fraction(x, error=0.000001): """ Format a fraction for html. """ f = float_to_fraction(x, error=error) whole_number_float = int((f[0]-(f[0]%f[1]))/f[1]) remainder_tuple = (f[0]%f[1], f[1]) if remainder_tuple[0] == 0: return str(whole_number_float) else: if whole_number_float == 0: whole_number_float = "" return "{0}<sup>{1}</sup>⁄<sub>{2}</sub>".format(whole_number_float, remainder_tuple[0], remainder_tuple[1]) def float_to_fraction(x, error=1e-06)-
Convert a float into a fraction. Works with floats like 2.66666666. Solution from https://stackoverflow.com/a/5128558/8406195
Expand source code
def float_to_fraction (x, error=0.000001): """ Convert a float into a fraction. Works with floats like 2.66666666. Solution from https://stackoverflow.com/a/5128558/8406195 """ n = int(math.floor(x)) x -= n if x < error: return (n, 1) elif 1 - error < x: return (n+1, 1) # The lower fraction is 0/1 lower_n = 0 lower_d = 1 # The upper fraction is 1/1 upper_n = 1 upper_d = 1 while True: # The middle fraction is (lower_n + upper_n) / (lower_d + upper_d) middle_n = lower_n + upper_n middle_d = lower_d + upper_d # If x + error < middle if middle_d * (x + error) < middle_n: # middle is our new upper upper_n = middle_n upper_d = middle_d # Else If middle < x - error elif middle_n < (x - error) * middle_d: # middle is our new lower lower_n = middle_n lower_d = middle_d # Else middle is our best fraction else: return (n * middle_d + middle_n, middle_d) def format_coeff(coeff)-
Format a reaction coefficient for html.
Expand source code
def format_coeff(coeff): """ Format a reaction coefficient for html. """ if coeff == 1 or coeff == -1: coeff = "" elif coeff.is_integer() and coeff < 0: coeff = str(-int(coeff)) elif coeff.is_integer() and coeff > 0: coeff = str(int(coeff)) else: if coeff < 0: coeff = float_to_formatted_fraction(-coeff) else: coeff = float_to_formatted_fraction(coeff) if coeff != "": coeff = coeff + " " return coeff def get_colors(colormap, ncol, alpha=1.0, hide_traceback=True)-
Get a list of rgb values for a matplotlib colormap
Parameters
colormap:str, default"WORM"- Name of the colormap to color the scatterpoints. Accepts "WORM", "colorblind", or matplotlib colormaps. See https://matplotlib.org/stable/tutorials/colors/colormaps.html The "colorblind" colormap is referenced from Wong, B. Points of view: Color blindness. Nat Methods 8, 441 (2011). https://doi.org/10.1038/nmeth.1618
ncol:int- Number of colors to return in the list.
alpha:float, default1.0- An alpha value between 0.0 (transparent) and 1.0 (opaque).
Returns
colors:list- A list of rgb color tuples
Expand source code
def get_colors(colormap, ncol, alpha=1.0, hide_traceback=True): """ Get a list of rgb values for a matplotlib colormap Parameters ---------- colormap : str, default "WORM" Name of the colormap to color the scatterpoints. Accepts "WORM", "colorblind", or matplotlib colormaps. See https://matplotlib.org/stable/tutorials/colors/colormaps.html The "colorblind" colormap is referenced from Wong, B. Points of view: Color blindness. Nat Methods 8, 441 (2011). https://doi.org/10.1038/nmeth.1618 ncol : int Number of colors to return in the list. alpha : float, default 1.0 An alpha value between 0.0 (transparent) and 1.0 (opaque). Returns ------- colors : list A list of rgb color tuples """ err_handler = Error_Handler(clean=hide_traceback) qualitative_cmaps = ['Pastel1', 'Pastel2', 'Paired', 'Accent', 'Dark2', 'Set1', 'Set2', 'Set3', 'tab10', 'tab20', 'tab20b', 'tab20c'] if colormap == "colorblind": # colors from Wong B. 2011, https://doi.org/10.1038/nmeth.1618 colors = [(0, 0, 0, alpha), # black (230/255, 159/255, 0, alpha), # orange (86/255, 180/255, 233/255, alpha), # sky blue (0, 158/255, 115/255, alpha), # bluish green (240/255, 228/255, 66/255, alpha), # yellow (0, 114/255, 178/255, alpha), # blue (213/255, 94/255, 0, alpha), # vermillion (204/255, 121/255, 167/255, alpha)] # reddish purple if ncol <= len(colors): return colors[:ncol] else: print("Switching from 'colorblind' colormap to 'viridis' because there are {} variables to plot.".format(ncol)) colormap = "viridis" elif colormap == "WORM": colors = [(0, 0, 0, alpha), # black (22/255, 153/255, 211/255, alpha), # blue (232/255, 86/255, 66/255, alpha), # red (245/255, 171/255, 80/255, alpha), # orange (115/255, 108/255, 168/255, alpha), # purple (151/255, 208/255, 119/255, alpha), # green (47/255, 91/255, 124/255, alpha), # dark blue (119/255, 119/255, 119/255, alpha)] # gray if ncol <= len(colors): return colors[:ncol] else: print("Switching from 'WORM' colormap to 'viridis' because there are {} variables to plot.".format(ncol)) colormap = "viridis" if colormap in qualitative_cmaps: # handle qualitative (non-continuous) colormaps colors = [plt.cm.__getattribute__(colormap).colors[i] for i in range(ncol)] colors = [(c[0], c[1], c[2], alpha) for c in colors] else: # handle sequential (continuous) colormaps norm = matplotlib.colors.Normalize(vmin=0, vmax=ncol-1) try: cmap = cm.__getattribute__(colormap) except: valid_colormaps = [cmap for cmap in dir(cm) if "_" not in cmap and cmap not in ["LUTSIZE", "MutableMapping", "ScalarMappable", "functools", "datad", "revcmap"]] err_handler.raise_exception("'{}'".format(colormap)+" is not a recognized matplotlib colormap. " "Try one of these: {}".format(valid_colormaps)) m = cm.ScalarMappable(norm=norm, cmap=cmap) colors = [m.to_rgba(i) for i in range(ncol)] colors = [(c[0], c[1], c[2], alpha) for c in colors] return colors def html_chemname_format(name, charge_sign_at_end=False)-
Format a chemical formula to display subscripts and superscripts in HTML (e.g., Plotly plots) Example, "CH3COO-" becomes "CH3COO-"
Parameters
name:str- A chemical formula.
charge_sign_at_end:bool, defaultFalse- Display charge with sign after the number (e.g. SO4 2-)?
Returns
A formatted chemical formula string.
Expand source code
def html_chemname_format(name, charge_sign_at_end=False): """ Format a chemical formula to display subscripts and superscripts in HTML (e.g., Plotly plots) Example, "CH3COO-" becomes "CH<sub>3</sub>COO<sup>-</sup>" Parameters ---------- name : str A chemical formula. charge_sign_at_end : bool, default False Display charge with sign after the number (e.g. SO4 2-)? Returns ------- A formatted chemical formula string. """ p = re.compile(r'(?P<sp>[-+]\d*?$)') name = p.sub(r'<sup>\g<sp></sup>', name) charge = re.search(r'<.*$', name) name_no_charge = re.match(r'(?:(?!<|$).)*', name).group(0) mapping = {"0": "<sub>0</sub>", "1": "<sub>1</sub>", "2": "<sub>2</sub>", "3": "<sub>3</sub>", "4": "<sub>4</sub>", "5": "<sub>5</sub>", "6": "<sub>6</sub>", "7": "<sub>7</sub>", "8": "<sub>8</sub>", "9": "<sub>9</sub>", ".":"<sub>.</sub>"} name_no_charge_formatted = "".join([mapping.get(x) or x for x in list(name_no_charge)]) if charge != None: name = name_no_charge_formatted + charge.group(0) else: name = name_no_charge_formatted if charge_sign_at_end: if "<sup>-" in name: name = name.replace("<sup>-", "<sup>") name = name.replace("</sup>", "-</sup>") if "<sup>+" in name: name = name.replace("<sup>+", "<sup>") name = name.replace("</sup>", "+</sup>") return(name) def html_chemname_format_AqEquil(name, charge_sign_at_end=False)-
AqEquil-specific formatting of chemical names for html. Takes "_(input)" into account when formatting names.
Parameters
name:str- A chemical formula.
Returns
A formatted chemical formula string.
Expand source code
def html_chemname_format_AqEquil(name, charge_sign_at_end=False): """ AqEquil-specific formatting of chemical names for html. Takes "_(input)" into account when formatting names. Parameters ---------- name : str A chemical formula. Returns ------- A formatted chemical formula string. """ # format only the first part of the name if it has "_(input)" if len(name.split("_(input)"))==2: if name.split("_(input)")[1] == '': name = name.split("_(input)")[0] input_flag=True else: input_flag = False name = html_chemname_format(name, charge_sign_at_end=charge_sign_at_end) # add " (input)" to the end of the name if input_flag: name = name+" (input)" return(name) def isnotebook()-
Check if this code is running in a Jupyter notebook
Expand source code
def isnotebook(): """ Check if this code is running in a Jupyter notebook """ try: shell = get_ipython().__class__.__name__ if shell == 'ZMQInteractiveShell': return True # Jupyter notebook or qtconsole elif shell == 'TerminalInteractiveShell': return False # Terminal running IPython else: return False # Other type (?) except NameError: return False # Probably standard Python interpreter def load(filename, messages=True, hide_traceback=True)-
Load a speciation file.
Parameters
filename:str- Name of the speciation file.
Returns
An object of class
Speciation.Expand source code
def load(filename, messages=True, hide_traceback=True): """ Load a speciation file. Parameters ---------- filename : str Name of the speciation file. Returns ---------- An object of class `Speciation`. """ err_handler = Error_Handler(clean=hide_traceback) if len(filename) <= 12: print("Attempting to load "+str(filename)+".speciation ...") filename = filename+".speciation" if 'speciation' in filename[-11:]: if os.path.exists(filename) and os.path.isfile(filename): pass else: err = "Cannot locate input file {}/{}".format(os.getcwd(), filename) err_handler.raise_exception(err) else: err = ("Input file {}".format(filename) + " " "must be in {} format.".format(ext_dict[ext])) err_handler.raise_exception(err) if os.path.getsize(filename) > 0: with open(filename, 'rb') as handle: speciation = dill.load(handle) if messages: print("Loaded '{}'".format(filename)) return speciation else: msg = "Cannot open " + str(filename) + " because the file is empty." err_handler.raise_exception(msg)
Classes
class AqEquil (eq36da='/home/shock/EQ3_6v8.0a/db', eq36co='/home/shock/EQ3_6v8.0a/bin', hide_traceback=True)-
Class containing functions to speciate aqueous water chemistry data using existing or custom thermodynamic datasets.
Parameters
eq36da:str, defaults to path given by the environment variable EQ36DA- Path to directory where data1 files are stored.
eq36co:str, defaults to path given by the environment variable EQ36CO- Path to directory where EQ3 executables are stored.
Attributes
eq36da:str- Path to directory where data1 files are stored.
eq36co:str- Path to directory where EQ3 executables are stored.
df_input_processed:pd.DataFrame- Pandas dataframe containing user-supplied sample chemistry data that has
been processed by
speciate. half_cell_reactions:pd.DataFrame- Pandas dataframe containing half cell reactions that can be combined into redox reactions for calculating chemical affinity and energy supply values during speciation.
redox_pairs:listofint- List of indices of half reactions in the
half_cell_reactionstable to be combined when generating full redox reactions. affinity_energy_reactions_raw:str- A formatted TSV string of redox reactions for calculating chemical affinities and energy supplies during speciation.
affinity_energy_reactions_table:pd.DataFrame- A table of redox reactions for calculating chemical affinities and energy supplies during speciation.
affinity_energy_formatted_reactions:pd.DataFrame- A pandas dataframe containing balanced redox reactions written in full.
verbose:int, 0, 1,or2, default1- Level determining how many messages are returned during a calculation. 2 for all messages, 1 for errors or warnings only, 0 for silent.
Expand source code
class AqEquil: """ Class containing functions to speciate aqueous water chemistry data using existing or custom thermodynamic datasets. Parameters ---------- eq36da : str, defaults to path given by the environment variable EQ36DA Path to directory where data1 files are stored. eq36co : str, defaults to path given by the environment variable EQ36CO Path to directory where EQ3 executables are stored. Attributes ---------- eq36da : str Path to directory where data1 files are stored. eq36co : str Path to directory where EQ3 executables are stored. df_input_processed : pd.DataFrame Pandas dataframe containing user-supplied sample chemistry data that has been processed by `speciate`. half_cell_reactions : pd.DataFrame Pandas dataframe containing half cell reactions that can be combined into redox reactions for calculating chemical affinity and energy supply values during speciation. redox_pairs : list of int List of indices of half reactions in the `half_cell_reactions` table to be combined when generating full redox reactions. affinity_energy_reactions_raw : str A formatted TSV string of redox reactions for calculating chemical affinities and energy supplies during speciation. affinity_energy_reactions_table : pd.DataFrame A table of redox reactions for calculating chemical affinities and energy supplies during speciation. affinity_energy_formatted_reactions : pd.DataFrame A pandas dataframe containing balanced redox reactions written in full. verbose : int, 0, 1, or 2, default 1 Level determining how many messages are returned during a calculation. 2 for all messages, 1 for errors or warnings only, 0 for silent. """ def __init__(self, eq36da=os.environ.get('EQ36DA'), eq36co=os.environ.get('EQ36CO'), hide_traceback=True): self.eq36da = eq36da self.eq36co = eq36co self.df_input_processed = None half_rxn_data = pkg_resources.resource_stream(__name__, "half_cell_reactions.csv") self.half_cell_reactions = pd.read_csv(half_rxn_data) #define the input file (dataframe of redox pairs) self.redox_pairs = None self.affinity_energy_reactions_raw = None self.affinity_energy_reactions_table = None self.affinity_energy_formatted_reactions = None self.verbose = 1 self.hide_traceback = hide_traceback self.err_handler = Error_Handler(clean=self.hide_traceback) os.environ['EQ36DA'] = self.eq36da # set eq3 db directory os.environ['EQ36CO'] = self.eq36co # set eq3 .exe directory def __capture_r_output(self): """ Capture and create a list of R console messages """ # Record output # self.stdout = [] self.stderr = [] if not DEBUGGING_R: # Dummy functions # def add_to_stdout(line): self.stdout.append(line) def add_to_stderr(line): self.stderr.append(line) # Keep the old functions # self.stdout_orig = rpy2.rinterface_lib.callbacks.consolewrite_print self.stderr_orig = rpy2.rinterface_lib.callbacks.consolewrite_warnerror # Set the call backs # rpy2.rinterface_lib.callbacks.consolewrite_print = add_to_stdout rpy2.rinterface_lib.callbacks.consolewrite_warnerror = add_to_stderr def __file_exists(self, filename, ext='.csv'): """ Check that a file exists and that it has the correct extension. Returns True if so, raises exception if not. """ ext_dict = { ".csv" : "comma separated values (.csv)", ".txt" : "standard text (.txt)", ".rds" : "R Data (.rds)", } if ext in filename[-4:]: if os.path.exists(filename) and os.path.isfile(filename): return True else: err = "Cannot locate input file {}/{}".format(os.getcwd(), filename) self.err_handler.raise_exception(err) else: err = ("Input file {}".format(filename) + " " "must be in {} format.".format(ext_dict[ext])) self.err_handler.raise_exception(err) return False def _check_database_file(self, filename): """ Check for problems in the thermodynamic database CSV. """ # is the file a csv? self.__file_exists(filename) thermo_df = pd.read_csv(filename) # does this file have the proper headers? required_headers = ["name", "abbrv", "formula", "state", "ref1", "ref2", "date", "E_units", "G", "H", "S", "Cp", "V", "a1.a", "a2.b", "a3.c", "a4.d", "c1.e", "c2.f", "omega.lambda", "z.T", "azero", "neutral_ion_type", "dissrxn", "tag", "formula_ox"] missing_headers = [] for header in required_headers: if header not in thermo_df.columns: missing_headers.append(header) if len(missing_headers) > 0: msg = ("The thermodynamic database file '{}'".format(filename)+" " "is missing one or more required columns: " "{}".format(", ".join(missing_headers))+". " "Are these headers spelled correctly in the file?") self.err_handler.raise_exception(msg) # does Cl-, O2(g), and O2 exist in the file? required_species = ["Cl-", "O2", "O2(g)"] missing_species = [] for species in required_species: if species not in list(thermo_df["name"]): missing_species.append(species) if len(missing_species) > 0: msg = ("The thermodynamic database file '{}'".format(filename)+" " "is missing required species:" "{}".format(missing_species)+". Default thermodynamic values" " will be used.") warnings.warn(msg) return def _check_sample_input_file(self, input_filename, exclude, db, custom_db, charge_balance_on, suppress_missing): """ Check for problems in sample input file. """ # does the input file exist? Is it a CSV? if self.__file_exists(input_filename): df_in = pd.read_csv(input_filename, header=None) # no headers for now so colname dupes can be checked else: self.err_handler.raise_exception("_check_sample_input() error!") # are there any samples? if df_in.shape[0] <= 2: err_no_samples = ("The file {}".format(input_filename) + " " "must contain at least three rows: the " "first for column names, the second for column subheaders, " "followed by one or more rows for sample data.") self.err_handler.raise_exception(err_no_samples) err_list = [] # for appending errors found in the sample input file # get header list col_list = list(df_in.iloc[0, 1:]) # are there blank headers? if True in [isinstance(x, float) and x != x for x in col_list]: # isinstance(x, float) and x != x is a typesafe way to check for nan err_blank_header = ("One or more columns in the sample input " "file have blank headers. These might be empty columns. " "Only the first column may have a blank header. Remove any " "empty columns and/or give each header a name.") self.err_handler.raise_exception(err_blank_header) # are there duplicate headers? dupe_cols = list(set([x for x in col_list if col_list.count(x) > 1])) if len(dupe_cols) > 0: err_dupe_cols = ("Duplicate column names are not allowed. " "Duplicate column names were found for:\n" "{}".format(str(dupe_cols))) err_list.append(err_dupe_cols) df_in.columns = df_in.iloc[0] # set column names df_in = df_in.drop(df_in.index[0], axis=0) # drop column name row df_in_headercheck = copy.deepcopy(df_in.iloc[:,1:]) # drop first column. Deepcopy slice because drop() doesn't work well with unnamed columns. # drop excluded headers for exc in exclude: if exc == df_in.columns[0]: # skip if 'sample' column is excluded continue try: df_in_headercheck = df_in_headercheck.drop(exc, axis=1) # drop excluded columns except: err_bad_exclude = ("Could not exclude the header '{}'".format(exc)+". " "This header could not be found in {}".format(input_filename)+"") err_list.append(err_bad_exclude) # get row list row_list = list(df_in.iloc[1:, 0]) # are there blank rows? if True in [isinstance(x, float) and x != x for x in row_list]: # isinstance(x, float) and x != x is a typesafe way to check for nan err_blank_row = ("One or more rows in the sample input " "file have blank sample names. These might be empty rows. " "Remove any empty rows and/or give each sample a name. Sample " "names go in the first column.") self.err_handler.raise_exception(err_blank_row) # are there duplicate rows? dupe_rows = list(set([x for x in row_list if row_list.count(x) > 1])) if len(dupe_rows) > 0: err_dupe_rows = ("Duplicate sample names are not allowed. " "Duplicate sample names were found for:\n" "{}".format(str(dupe_rows))) err_list.append(err_dupe_rows) # are there any leading or trailing spaces in sample names? invalid_sample_names = [n for n in list(df_in.iloc[1:, 0]) if str(n[0])==" " or str(n[-1])==" "] if len(invalid_sample_names) > 0: err_sample_leading_trailing_spaces = ("The following sample names " "have leading or trailing spaces. Remove spaces and try again: " "{}".format(invalid_sample_names)) self.err_handler.raise_exception(err_sample_leading_trailing_spaces) # are column names valid entries in the database? if custom_db: data0_path = "data0." + db else: data0_path = self.eq36da + "/data0." + db if os.path.exists(data0_path) and os.path.isfile(data0_path): with open(data0_path) as data0: data0_lines = data0.readlines() start_index = [i+1 for i, s in enumerate(data0_lines) if '* species name' in s] end_index = [i-1 for i, s in enumerate(data0_lines) if 'elements' in s] db_species = [i.split()[0] for i in data0_lines[start_index[0]:end_index[0]]] if charge_balance_on == 'pH': err_charge_balance_on_pH = ("To balance charge on pH, use " "charge_balance_on='H+'") err_list.append(err_charge_balance_on_pH) elif charge_balance_on in ['Temperature', 'logfO2']: err_charge_balance_invalid_type = ("Cannot balance charge " "on {}.".format(charge_balance_on)) err_list.append(err_charge_balance_invalid_type) elif charge_balance_on != "none" and charge_balance_on not in list(set(df_in_headercheck.columns)): err_charge_balance_invalid_sp = ("The species chosen for charge balance" " '{}'".format(charge_balance_on)+"" " was not found among the headers of the sample input file.") err_list.append(err_charge_balance_invalid_sp) for species in list(dict.fromkeys(df_in_headercheck.columns)): if species not in db_species and species not in ['Temperature', 'logfO2', 'pH']: err_species_not_in_db = ("The species '{}'".format(species) + " " "was not found in {}".format(data0_path) + ". " "If the column contains data that should not be " "included in the speciation calculation, add the " "column name to the 'exclude' argument. Try " "help(AqEquil.AqEquil.speciate) " "for more information about 'exclude'.") err_list.append(err_species_not_in_db) elif species == 'pH': err_species_pH = ("Please rename the 'pH' column in " "the sample input file to 'H+' with the subheader " "unit 'pH'.") err_list.append(err_species_pH) else: err_no_data0 = ("Could not locate {}.".format(data0_path) + " " "Unable to determine if column headers included in " "{} ".format(input_filename) + "match entries for species " "in the requested thermodynamic database '{}'.".format(db)) err_list.append(err_no_data0) # are subheader units valid? subheaders = df_in_headercheck.iloc[0,] valid_subheaders = ["degC", "ppm", "ppb", "Suppressed", "Molality", "Molarity", "mg/L", "mg/kg.sol", "Alk., eq/kg.H2O", "Alk., eq/L", "Alk., eq/kg.sol", "Alk., mg/L CaCO3", "Alk., mg/L HCO3-", "Log activity", "Log act combo", "Log mean act", "pX", "pH", "pHCl", "pmH", "pmX", "Hetero. equil.", "Homo. equil.", "Make non-basis", "logfO2", "Mineral"] for i, subheader in enumerate(subheaders): if subheader not in valid_subheaders: err_valid_sub = ("The subheader '{}'".format(subheader) + " " "for the column '{}'".format(df_in_headercheck.columns[i]) + " " "is not recognized. Valid subheaders are {}".format(str(valid_subheaders)) + ". " "If the column {}".format(df_in_headercheck.columns[i]) + " " "contains data that is not meant for the " "speciation calculation, add the column name " "to the 'exclude' argument. Try help(AqEquil.AqEquil.speciate) " "for more information about 'exclude'.") err_list.append(err_valid_sub) # is a 'Temperature' column present? if "Temperature" not in df_in_headercheck.columns and "Temperature" not in exclude: err_temp = ("The column 'Temperature' was not found in the input file. " "Please include a column with 'Temperature' in the first row, " "'degC' in the second row, and a temperature value for each " "sample in degrees Celsius.") err_list.append(err_temp) # raise an exception that summarizes all errors found if len(err_list) > 0: errs = "\n\n*".join(err_list) errs = ("The input file {}".format(input_filename)+" encountered" " errors:\n\n*" + errs) self.err_handler.raise_exception(errs) return def __clear_eqpt_extra_output(self): """ Deletes all EQPT output except data1. """ if os.path.exists("eqpt_log.txt") and os.path.isfile("eqpt_log.txt"): os.remove("eqpt_log.txt") if os.path.exists("data1f.txt") and os.path.isfile("data1f.txt"): os.remove("data1f.txt") if os.path.exists("slist.txt") and os.path.isfile("slist.txt"): os.remove("slist.txt") def runeqpt(self, db, extra_eqpt_output=False): """ Convert a data0 into a data1 file with EQPT. Parameters ---------- db : str Three letter code of database. extra_eqpt_output : bool, default False Keep additional output files from EQPT? These files include eqpt_log.txt, data1f.txt, and slist.txt. """ if os.path.exists("data0."+db) and os.path.isfile("data0."+db): pass else: self.err_handler.raise_exception(" ".join(["Error: could not locate custom database", "data0.{} in {}.".format(db, os.getcwd())])) if os.path.exists("data1."+db) and os.path.isfile("data1."+db): os.remove("data1."+db) self.__clear_eqpt_extra_output() os.environ['EQ36DA'] = os.getcwd() args = ['/bin/csh', self.eq36co+'/runeqpt', db] try: self.__run_script_and_wait(args) # run EQPT except: os.environ['EQ36DA'] = self.eq36da self.err_handler.raise_exception( "Error: EQPT failed to run on {}.".format("data0."+db)) if os.path.exists("data1") and os.path.isfile("data1"): os.rename("data1", "data1."+db) if os.path.exists("output") and os.path.isfile("output"): os.rename("output", "eqpt_log.txt") if os.path.exists("data1f") and os.path.isfile("data1f"): os.rename("data1f", "data1f.txt") if os.path.exists("slist") and os.path.isfile("slist"): os.rename("slist", "slist.txt") if os.path.exists("data1."+db) and os.path.isfile("data1."+db): if self.verbose > 0: print("Successfully created a data1."+db+" from data0."+db) else: msg = ("EQPT could not create data1."+db+" from " "data0."+db+". Check eqpt_log.txt for details.") self.err_handler.raise_exception(msg) if not extra_eqpt_output: self.__clear_eqpt_extra_output() os.environ['EQ36DA'] = self.eq36da # reset default EQ36 db path def runeq3(self, filename_3i, db, samplename=None, path_3i=os.getcwd(), path_3o=os.getcwd(), path_3p=os.getcwd()): """ Call EQ3 on a .3i input file. Parameters ---------- filename_3i : str Name of 3i input file. db : str Three letter code of database. path_3i : path str, default current working directory Path of .3i input files. path_3o : path str, default current working directory Path of .3o output files. path_3p : path str, default current working directory Path of .3p pickup files. """ # get current working dir cwd = os.getcwd() if samplename == None: samplename = filename_3i[:-3] if self.verbose > 0: print('Using ' + db + ' to speciate ' + samplename) os.chdir(path_3i) # step into 3i folder args = ['/bin/csh', self.eq36co+'/runeq3', db, filename_3i] self.__run_script_and_wait(args) # run EQ3 # restore working dir os.chdir(cwd) filename_3o = filename_3i[:-1] + 'o' filename_3p = filename_3i[:-1] + 'p' try: # rename output os.rename(path_3i + '/output', path_3i + "/" + filename_3o) except: if self.verbose > 0: print('Error: EQ3 failed to produce output for ' + filename_3i) try: # move output shutil.move(path_3i + "/" + filename_3o, path_3o + "/" + filename_3o) except: if self.verbose > 0: print('Error: Could not move', filename_3o, "to", path_3o) try: # rename pickup os.rename(path_3i + '/pickup', path_3i + "/" + filename_3p) move_pickup = True except: if self.verbose > 0: print('Error: EQ3 failed to produce a pickup file for ' + filename_3i) move_pickup = False if move_pickup: try: # move pickup shutil.move(path_3i + "/" + filename_3p, path_3p + "/" + filename_3p) except: if self.verbose > 0: print('Error: Could not move', filename_3p, "to", path_3p) def runeq6(self, filename_6i, db, samplename=None, path_6i=os.getcwd(), path_6o=os.getcwd(), path_6p=os.getcwd()): """ Call EQ6 on a .6i input file. Parameters ---------- filename_6i : str Name of 6i input file. db : str Three letter code of database. path_6i : path str, default current working directory Path of .6i input files. path_6o : path str, default current working directory Path of .6o output files. path_6p : path str, default current working directory Path of .6p pickup files. """ # get current working dir cwd = os.getcwd() if samplename == None: samplename = filename_6i[:-3] if self.verbose > 0: print('Using ' + db + ' to speciate ' + samplename) os.chdir(path_6i) # step into 6i folder args = ['/bin/csh', self.eq36co+'/runeq6', db, filename_6i] self.__run_script_and_wait(args) # run EQ6 # restore working dir os.chdir(cwd) filename_6o = filename_6i[:-1] + 'o' filename_6p = filename_6i[:-1] + 'p' try: # rename output os.rename(path_6i + '/output', path_6i + "/" + filename_6o) except: if self.verbose > 0: print('Error: EQ6 failed to produce output for ' + filename_6i) try: # move output shutil.move(path_6i + "/" + filename_6o, path_6o + "/" + filename_6o) except: if self.verbose > 0: print('Error: Could not move', filename_6o, "to", path_6o) try: # rename pickup os.rename(path_6i + '/pickup', path_6i + "/" + filename_6p) move_pickup = True except: if self.verbose > 0: print('Error: EQ6 failed to produce a pickup file for ' + filename_6i) move_pickup = False if move_pickup: try: # move pickup shutil.move(path_6i + "/" + filename_6p, path_6p + "/" + filename_6p) except: if self.verbose > 0: print('Error: Could not move', filename_6p, "to", path_6p) def __mk_check_del_directory(self, path): """ Checks for the dir being created. If it is already present, delete it before recreating it. """ if not os.path.exists(path): os.makedirs(path) else: shutil.rmtree(path) os.makedirs(path) def __read_inputs(self, file_type, location): """ Finds all files of a filetype in all downstream folders. """ file_name = [] # file names file_list = [] # file names with paths for root, dirs, files in os.walk(location): for file in files: if file.endswith(file_type): if "-checkpoint" not in file: file_name.append(file) file_list.append(os.path.join(root, file)) return file_name, file_list def __run_script_and_wait(self, args): """ Runs shell commands. """ # DEVNULL and STDOUT needed to suppress all warnings subprocess.Popen(args, stdout=subprocess.DEVNULL, stderr=subprocess.STDOUT).wait() def _delete_rxn_folders(self): """ Deletes folders storing raw EQ3/6 input and output. """ if os.path.exists('rxn_3i') and os.path.isdir('rxn_3i'): shutil.rmtree('rxn_3i') if os.path.exists('rxn_3o') and os.path.isdir('rxn_3o'): shutil.rmtree('rxn_3o') if os.path.exists('rxn_3p') and os.path.isdir('rxn_3p'): shutil.rmtree('rxn_3p') if os.path.exists('rxn_6i') and os.path.isdir('rxn_6i'): shutil.rmtree('rxn_6i') if os.path.exists('rxn_6o') and os.path.isdir('rxn_6o'): shutil.rmtree('rxn_6o') if os.path.exists('rxn_6p') and os.path.isdir('rxn_6p'): shutil.rmtree('rxn_6p') def speciate(self, input_filename, db="wrm", redox_flag="logfO2", redox_aux="Fe+3", default_logfO2=-6, exclude=[], suppress=[], alter_options=[], charge_balance_on="none", suppress_missing=True, verbose=1, report_filename=None, get_aq_dist=True, aq_dist_type="log_activity", get_mass_contribution=True, mass_contribution_other=True, get_mineral_sat=True, mineral_sat_type="affinity", get_redox=True, redox_type="Eh", get_ion_activity_ratios=True, get_fugacity=True, get_basis_totals=True, get_solid_solutions=True, get_affinity_energy=False, rxn_filename=None, not_limiting=["H+", "OH-", "H2O"], get_charge_balance=True, custom_db=False, extra_eqpt_output=False, batch_3o_filename=None, delete_generated_folders=False, custom_obigt=None): """ Calculate the equilibrium distribution of chemical species in solution. Additionally, calculate chemical affinities and energy supplies for user-specified reactions. Parameters ---------- input_filename : str User-supplied utf8-encoded comma separated value (csv) file containing sample data intended for speciation. The file must follow this format: - the first row is a header row that must contain the names of the species to be included in the speciation calculation. There cannot be duplicate headers. - the second row must contain subheaders for each species in the header row. These subheaders must be taken from the following: degC ppm ppb Suppressed Molality Molarity mg/L mg/kg.sol Alk., eq/kg.H2O Alk., eq/L Alk., eq/kg.sol Alk., mg/L CaCO3 Alk., mg/L HCO3- Log activity Log act combo Log mean act pX pH pHCl pmH pmX Hetero. equil. Homo. equil. Make non-basis - 'Temperature' must be included as a header, with 'degC' as its subheader. - The first column must contain sample names. There cannot be duplicate sample names. db : three letter str, default "wrm" Three letter file extension for the desired thermodynamic database. If `custom_db` is False, this database must be named data1.xyz (where xyz is your desired three letter extension) and located in the EQ3/6 'EQ36DA' path. Otherwise, the database must be named data0.xyz and located in your current working directory. Note that data1 files are already compiled by EQPT, while data0 files will be automatically compiled for you if `custom_db` is True. redox_flag : str, default "O2(g)" Determines which column in the sample input file sets the overall redox state of the samples. Options for redox_flag include 'O2(g)', 'pe', 'Eh', 'logfO2', and 'redox aux'. The code will search your sample spreadsheet file (see `filename`) for a column corresponding to the option you chose: * 'O2(g)' with a valid subheader for a gas * 'pe' with subheader pe * 'Eh' with subheader volts * 'logfO2' with subheader logfO2 * 'redox aux' will search for a column corresponding to the auxilliary basis species selected to form a redox couple with its linked strict basis species (see `redox_aux`). For example, the redox couple Fe+2/Fe+3 would require a column named Fe+3 If an appropriate header or redox data cannot be found to define redox state, `default_logfO2` is used to set sample logfO2. There is a special case where dissolved oxygen can be used to impose sample redox state if `redox_flag` is set to logfO2 and a column named logfO2 does not appear in your sample spreadsheet. If there is a column corresponding to dissolved oxygen measurements, logfO2 is calculated from the equilibrium reaction O2(aq) = O2(g) at the temperature and pressure of the sample using the revised Helgeson- Kirkham-Flowers (HKF) equation of state (JC Tanger IV and HC Helgeson, Am. J. Sci., 1988, 288, 19). redox_aux : default "Fe+3", optional Ignored unless `redox_flag` equals 1. Name of the auxilliary species whose reaction links it to a basis species (or another auxilliary species) such that they form a redox couple that controls sample fO2. For instance, Fe+3 is linked to Fe+2 in many supporting data files, so selecting `redox_flag` = 1 and `redox_aux` = "Fe+3" will set sample fO2 based on the Fe+2/Fe+3 redox couple. default_logfO2 : float, default -6 Default value for sample logfO2 in case redox data cannot be found in the user-supplied sample spreadsheet. exclude : list of str, default [] Names of columns in the user-supplied sample spreadsheet that should not be considered aqueous species. Useful for excluding columns containing sample metatadata, such as "Year" and "Location". suppress : list of str, default [] Names of chemical species that will be prevented from forming in the speciation calculation. alter_options : list, default [] A list of lists, e.g., [["CaOH+", "Suppress"], ["CaCl+", "AugmentLogK", -1]] The first element of each interior list is the name of a species. The second element is an option to alter the species, and can be: - Suppress : suppress the formation of the species. (See also: `suppress`). - Replace : replace the species' log K value with a desired value. - AugmentLogK : augment the value of the species' log K. - AugmentG : augment the Gibbs free energy of the species by a desired value, in kcal/mol. The third element is a numeric value corresponding to the chosen option. A third element is not required for Suppress. charge_balance_on : str, default "none" If "none", will not balance electrical charge between cations and anions in the speciation calculation. If a name of a species is supplied instead, the activity of that species will be allowed to change until charge balance is obtained. For example, charge_balance_on = "H+" will calculate what pH a sample must have to have zero net charge. suppress_missing : bool, default True Suppress the formation of an aqueous species if it is missing a value in the user-supplied sample spreadsheet? verbose : int, 0, 1, or 2, default 1 Level determining how many messages are returned during a calculation. 2 for all messages, 1 for errors or warnings only, 0 for silent. report_filename : str, optional Name of the comma separated values (csv) report file generated when the calculation is complete. If this argument is not defined, a report file is not generated. get_aq_dist : bool, default True Calculate distributions of aqueous species? aq_dist_type : str, default "log_activity" Desired units of measurement for reported distributions of aqueous species. Can be "molality", "log_molality", "log_gamma", or "log_activity". Ignored if `get_aq_dist` is False. get_mass_contribution : bool, default True Calculate basis species contributions to mass balance of aqueous species? mass_contribution_other : bool, default True Include an "other" species for the sake of summing percents of basis species contributions to 100%? Ignored if `get_mass_contribution` is False. get_mineral_sat : bool, default True Calculate saturation states of pure solids? mineral_sat_type : str, default "affinity" Desired units of measurement for reported saturation states of pure solids. Can be "logQoverK" or "affinity". Ignored if `get_mineral_sat` is False. get_redox : bool, default True Calculate potentials of redox couples? redox_type : str, default "Eh" Desired units of measurement for reported redox potentials. Can be "Eh", "pe", "logfO2", or "Ah". Ignored if `get_redox` is False. get_ion_activity_ratios : bool, default True Calculate ion/H+ activity ratios and neutral species activities? get_fugacity : bool, default True Calculate gas fugacities? get_basis_totals : bool, default True Report total compositions of basis aqueous species? get_solid_solutions : bool, default True Permit the calculation of solid solutions and include them in the speciation report? get_affinity_energy : bool, default False Calculate affinities and energy supplies of reactions listed in a separate user-supplied file? rxn_filename : str, optional Name of .txt file containing reactions used to calculate affinities and energy supplies. Ignored if `get_affinity_energy` is False. not_limiting : list, default ["H+", "OH-", "H2O"] List containing names of species that are not considered limiting when calculating energy supplies. Ignored if `get_affinity_energy` is False. get_charge_balance : bool, default True Calculate charge balance and ionic strength? custom_db : bool, default False Is the database defined by `db` a custom user-supplied database? If this is set to True, searches for a data0.xyz file in the current working directory, where 'xyz' corresponds to the three letter code assigned to `db`. This data0 file is automatically converted into a machine-readable file called data1 by software called EQPT. This data1 file is then used in speciation calculations. extra_eqpt_output : bool, default False Keep additional output files created by EQPT (see `custom_db`)? Ignored if `custom_db` is False. batch_3o_filename : str, optional Name of rds (R object) file exported after the speciation calculation? No file will be generated if this argument is not defined. delete_generated_folders : bool, default False Delete the 'rxn_3i', 'rxn_3o', and 'rxn_3p' folders containing raw EQ3NR input, output, and pickup files once the speciation calculation is complete? custom_obigt : str, optional unless `get_affinity_energy` is True Path of custom database csv used to generate a custom data0 file. Needed for affinity and energy calculations. Returns ------- speciation : object of class Speciation Contains the results of the speciation calculation. """ self.verbose = verbose # check input sample file for errors self._check_sample_input_file(input_filename, exclude, db, custom_db, charge_balance_on, suppress_missing) if aq_dist_type not in ["molality", "log_molality", "log_gamma", "log_activity"]: self.err_handler.raise_exception("Unrecognized aq_dist_type. Valid " "options are 'molality', 'log_molality', 'log_gamma', 'log_activity'") if mineral_sat_type not in ["logQoverK", "affinity"]: self.err_handler.raise_exception("Unrecognized mineral_sat_type. Valid " "options are 'logQoverK' or 'affinity'") if redox_type not in ["Eh", "pe", "logfO2", "Ah"]: self.err_handler.raise_exception("Unrecognized redox_type. Valid " "options are 'Eh', 'pe', 'logfO2', or 'Ah'") if redox_flag == "O2(g)" or redox_flag == -3: redox_flag = -3 elif redox_flag == "pe" or redox_flag == -2: redox_flag = -2 elif redox_flag == "Eh" or redox_flag == -1: redox_flag = -1 elif redox_flag == "logfO2" or redox_flag == 0: redox_flag = 0 elif redox_flag == "redox aux" or redox_flag == 1: redox_flag = 1 else: self.err_handler.raise_exception("Unrecognized redox flag. Valid options are 'O2(g)'" ", 'pe', 'Eh', 'logfO2', 'redox aux'") # handle batch_3o naming if batch_3o_filename != None: if ".rds" in batch_3o_filename[-4:]: batch_3o_filename = batch_3o_filename else: batch_3o_filename = "batch_3o_{}.rds".format(db) else: batch_3o_filename = ro.r("NULL") # custom obigt used for energy calculations (temporary fix to allow # custom data to be imported into CHNOSZ for energy calculations) # TODO: remove this and have code find custom data automatically. It's a tricky problem! if isinstance(custom_obigt, str): if os.path.exists(custom_obigt) and os.path.isfile(custom_obigt): pass else: err = ("Could not find custom_obigt file {}.".format(custom_obigt)) self.err_handler.raise_exception(err) else: custom_obigt = ro.r("NULL") if custom_db: # EQ3/6 cannot handle spaces in the 'EQ36DA' path name. if " " in os.getcwd(): msg = ("Error: the path to the custom database " "cannot contain spaces. The current path " "is: [ " + os.getcwd() + " ]. Remove or " "replace spaces in folder names for this " "feature. Example: [ " + os.getcwd().replace(" ", "-") + " ].") self.err_handler.raise_exception(msg) self.runeqpt(db, extra_eqpt_output) os.environ['EQ36DA'] = os.getcwd() data0_path = "data0." + db else: data0_path = self.eq36da + "/data0." + db if os.path.exists(data0_path) and os.path.isfile(data0_path): with open(data0_path) as data0: data0_lines = data0.readlines() start_index = [i+1 for i, s in enumerate(data0_lines) if s == 'temperatures\n'] end_index = [i for i, s in enumerate(data0_lines) if s == 'debye huckel a (adh)\n'] db_grids_unformatted = [i.split("pressures")[0] for i in data0_lines[start_index[0]:end_index[0]]] db_grids = [" ".join(i.split()) for i in db_grids_unformatted if i != ''] grid_temp = db_grids[0] + " " + db_grids[1] grid_press = db_grids[2] + " " + db_grids[3] grid_temp = grid_temp.split(" ") grid_press = grid_press.split(" ") try: water_model = data0_lines[1].split("model: ")[1] # extract water model from the second line of data0 file water_model = water_model.replace("\n", "") except: water_model = "SUPCRT92" # print("Water model could not be referenced from {}".format(data0_path)+"" # ". Defaulting to SUPCRT92 water model...") if(water_model not in ["SUPCRT92", "IAPWS95", "DEW"]): water_model = "SUPCRT92" # the default for EQ3/6 print("Water model given in {}".format(data0_path)+" was not " "recognized. Defaulting to SUPCRT92 water model...") else: # if a data0 file can't be found, assume default water model, 0-350 C and PSAT water_model = "SUPCRT92" grid_temp = ["0.0100", "50.0000", "100.0000", "150.0000", "200.0000", "250.0000", "300.0000", "350.0000"] grid_press = ["1.0000", "1.0000", "1.0132", "4.7572", "15.5365", "39.7365", "85.8378", "165.2113"] if get_affinity_energy: if rxn_filename == None and self.affinity_energy_reactions_raw==None: err = ("get_affinity_energy is set to True but a reaction " "file is not specified or redox reactions have not yet " "been generated with generate_redox_reactions()") self.err_handler.raise_exception(err) elif rxn_filename != None: self.__file_exists(rxn_filename, '.txt') self.affinity_energy_reactions_raw = pd.read_csv(rxn_filename, sep="\t", header=None, names=["col"+str(i) for i in range(1,50)]) load_rxn_file = True else: rxn_filename = self.affinity_energy_reactions_raw load_rxn_file = False else: rxn_filename = "" load_rxn_file=False # handle Alter/Suppress options # e.g. [["CaCl+", "AugmentLogK", -1], ["CaOH+", "Suppress"]] alter_options_dict = {} if len(alter_options) > 0: for ao in alter_options: key = ao[0] if ao[1] == "Suppress" and len(ao) == 2: ao += ["0"] alter_options_dict[key] = convert_to_RVector(list(ao[1:])) alter_options = ro.ListVector(alter_options_dict) # preprocess for EQ3 using R scripts self.__capture_r_output() r_prescript = pkg_resources.resource_string( __name__, 'preprocess_for_EQ3.r').decode("utf-8") ro.r(r_prescript) df_input_processed = ro.r.preprocess(input_filename=input_filename, exclude=convert_to_RVector( exclude), redox_flag=redox_flag, redox_aux=redox_aux, default_logfO2=default_logfO2, charge_balance_on=charge_balance_on, suppress_missing=suppress_missing, suppress=convert_to_RVector( suppress), alter_options=alter_options, water_model=water_model, grid_temp=convert_to_RVector(grid_temp), grid_press=convert_to_RVector(grid_press), get_solid_solutions=get_solid_solutions, verbose=self.verbose) for line in self.stderr: print(line) self.df_input_processed = ro.conversion.rpy2py(df_input_processed) # run EQ3 on each input file cwd = os.getcwd() self.__mk_check_del_directory('rxn_3o') self.__mk_check_del_directory('rxn_3p') files_3i, files_3i_paths = self.__read_inputs('3i', 'rxn_3i') input_dir = cwd + "/rxn_3i/" output_dir = cwd + "/rxn_3o/" pickup_dir = cwd + "/rxn_3p/" for file in files_3i: samplename = self.df_input_processed.loc[file[:-3], "Sample"] self.runeq3(filename_3i=file, db=db, samplename=samplename, path_3i=input_dir, path_3o=output_dir, path_3p=pickup_dir) if custom_db: os.environ['EQ36DA'] = self.eq36da files_3o = [file+".3o" for file in self.df_input_processed.index] df_input_processed_names = convert_to_RVector(list(self.df_input_processed.columns)) # mine output self.__capture_r_output() r_3o_mine = pkg_resources.resource_string( __name__, '3o_mine.r').decode("utf-8") ro.r(r_3o_mine) batch_3o = ro.r.main_3o_mine( files_3o=convert_to_RVector(files_3o), input_filename=input_filename, rxn_filename=rxn_filename, get_aq_dist=get_aq_dist, aq_dist_type=aq_dist_type, get_mass_contribution=get_mass_contribution, mass_contribution_other=mass_contribution_other, get_mineral_sat=get_mineral_sat, mineral_sat_type=mineral_sat_type, get_redox=get_redox, redox_type=redox_type, get_charge_balance=get_charge_balance, get_ion_activity_ratios=get_ion_activity_ratios, get_fugacity=get_fugacity, get_basis_totals=get_basis_totals, get_solid_solutions=get_solid_solutions, get_affinity_energy=get_affinity_energy, load_rxn_file=load_rxn_file, not_limiting=convert_to_RVector(not_limiting), batch_3o_filename=batch_3o_filename, df_input_processed=ro.conversion.py2rpy(self.df_input_processed), # New rpy2 py2rpy2 conversion might not need the workaround below. # The old note regarding deprecated pandas2ri is shown below... # OLD NOTE: # Needed for keeping symbols in column names after porting # df_input_processed in the line above. Some kind of check.names # option for pandas2ri.py2ri would be nice. Workaround: df_input_processed_names=df_input_processed_names, custom_obigt=custom_obigt, verbose=self.verbose, ) for line in self.stderr: print(line) if len(batch_3o) == 0: self.err_handler.raise_exception("Could not compile a speciation report. This is " "likely because errors occurred during " "the speciation calculation.") return if get_mass_contribution: mass_contribution = ro.conversion.rpy2py(batch_3o.rx2('mass_contribution')) df_report = ro.conversion.rpy2py(batch_3o.rx2('report')) #df_input = ro.conversion.rpy2py(batch_3o.rx2('input')) report_divs = batch_3o.rx2('report_divs') input_cols = list(report_divs.rx2('input')) df_input = df_report[input_cols].copy() # add a pressure column to df_input df_input["Pressure_bar"] = pd.Series(dtype='float') sample_data = batch_3o.rx2('sample_data') for sample in sample_data: df_input.loc[str(sample.rx2('name')[0]), "Pressure_bar"] = float(sample.rx2('pressure')[0]) report_divs[0] = convert_to_RVector(input_cols + ["Pressure_bar"]) # handle headers and subheaders of input section headers = [col.split("_")[0] for col in list(df_input.columns)] headers = ["pH" if header == "H+" else header for header in headers] headers = [header+"_(input)" if header not in ["Temperature", "logfO2", "Pressure"]+exclude else header for header in headers] report_divs[0] = convert_to_RVector(headers) # modify headers in the 'input' section, report_divs[0] subheaders = [subheader[1] if len(subheader) > 1 else "" for subheader in [ col.split("_") for col in list(df_input.columns)]] multicolumns = pd.MultiIndex.from_arrays( [headers, subheaders], names=['Sample', '']) df_input.columns = multicolumns df_join = df_input if get_aq_dist: aq_distribution_cols = list(report_divs.rx2('aq_distribution')) df_aq_distribution = df_report[aq_distribution_cols] df_aq_distribution = df_aq_distribution.apply(pd.to_numeric, errors='coerce') # create a pH column from H+ df_aq_distribution["pH"] = np.nan # pH values are assigned when sample data is assembled later # handle headers of aq_distribution section headers = df_aq_distribution.columns subheaders = [aq_dist_type]*(len(headers)-1) # -1 because the last column will have subheader pH (see next line) subheaders = subheaders + ["pH"] multicolumns = pd.MultiIndex.from_arrays( [headers, subheaders], names=['Sample', '']) df_aq_distribution.columns = multicolumns # ensure final pH column is included in report_divs aq_distribution section aq_dist_indx = report_divs.names.index("aq_distribution") report_divs[aq_dist_indx] = convert_to_RVector(list(headers)) df_join = df_join.join(df_aq_distribution) if get_mineral_sat: mineral_sat_cols = list(report_divs.rx2('mineral_sat')) mineral_sat_cols = [col for col in mineral_sat_cols if col != "df"] # TO DO: why is df appearing in mineral sat cols and redox sections? df_mineral_sat = df_report[mineral_sat_cols] df_mineral_sat = df_mineral_sat.apply(pd.to_numeric, errors='coerce') # handle headers of df_mineral_sat section if mineral_sat_type == "affinity": mineral_sat_unit = "affinity_kcal" elif mineral_sat_type == "logQoverK": mineral_sat_unit = "logQ/K" else: self.err_handler.raise_exception( "mineral_sat_type must be either 'affinity' or 'logQoverK'") headers = df_mineral_sat.columns subheaders = [mineral_sat_unit]*len(headers) multicolumns = pd.MultiIndex.from_arrays( [headers, subheaders], names=['Sample', '']) df_mineral_sat.columns = multicolumns df_join = df_join.join(df_mineral_sat) if get_redox: redox_cols = list(report_divs.rx2('redox')) redox_cols = [col for col in redox_cols if col != "df"] # TO DO: why is df appearing in mineral sat cols and redox sections? df_redox = df_report[redox_cols] df_redox = df_redox.apply(pd.to_numeric, errors='coerce') # handle headers of df_redox section if redox_type == "Eh": redox_unit = "Eh_volts" elif redox_type == "pe": redox_unit = "pe" elif redox_type == "logfO2": redox_unit = "logfO2" elif redox_type == "Ah": redox_unit = "Ah_kcal" else: self.err_handler.raise_exception( "redox_type must be either 'Eh', 'pe', 'logfO2', or 'Ah'") headers = df_redox.columns subheaders = [redox_unit]*len(headers) multicolumns = pd.MultiIndex.from_arrays( [headers, subheaders], names=['Sample', '']) df_redox.columns = multicolumns df_join = df_join.join(df_redox) if get_charge_balance: charge_balance_cols = list(report_divs.rx2('charge_balance')) df_charge_balance = df_report[charge_balance_cols] df_charge_balance = df_charge_balance.apply(pd.to_numeric, errors='coerce') # handle headers of df_charge_balance section headers = df_charge_balance.columns subheaders = ["%"]*2 + ['eq/kg.H2O', 'molality'] + \ ['eq/kg.H2O']*4 + ['molality'] multicolumns = pd.MultiIndex.from_arrays( [headers, subheaders], names=['Sample', '']) df_charge_balance.columns = multicolumns df_join = df_join.join(df_charge_balance) if get_ion_activity_ratios: ion_activity_ratio_cols = list(report_divs.rx2('ion_activity_ratios')) df_ion_activity_ratios = df_report[ion_activity_ratio_cols] df_ion_activity_ratios = df_ion_activity_ratios.apply(pd.to_numeric, errors='coerce') # handle headers of df_ion_activity_ratios section headers = df_ion_activity_ratios.columns subheaders = ["Log ion-H+ activity ratio"]*len(headers) multicolumns = pd.MultiIndex.from_arrays( [headers, subheaders], names=['Sample', '']) df_ion_activity_ratios.columns = multicolumns df_join = df_join.join(df_ion_activity_ratios) if get_fugacity: fugacity_cols = list(report_divs.rx2('fugacity')) df_fugacity = df_report[fugacity_cols] df_fugacity = df_fugacity.apply(pd.to_numeric, errors='coerce') # handle headers of fugacity section headers = df_fugacity.columns subheaders = ["log_fugacity"]*len(headers) multicolumns = pd.MultiIndex.from_arrays( [headers, subheaders], names=['Sample', '']) df_fugacity.columns = multicolumns df_join = df_join.join(df_fugacity) if get_basis_totals: sc_cols = list(report_divs.rx2('basis_totals')) df_sc = df_report[sc_cols] df_sc = df_sc.apply(pd.to_numeric, errors='coerce') # handle headers of basis_totals section headers = df_sc.columns subheaders = ["molality"]*(len(headers)) multicolumns = pd.MultiIndex.from_arrays( [headers, subheaders], names=['Sample', '']) df_sc.columns = multicolumns df_join = df_join.join(df_sc) if get_affinity_energy: affinity_cols = list(report_divs.rx2('affinity')) energy_cols = list(report_divs.rx2('energy')) df_affinity = df_report[affinity_cols] df_energy = df_report[energy_cols] df_affinity = df_affinity.apply(pd.to_numeric, errors='coerce') df_energy = df_energy.apply(pd.to_numeric, errors='coerce') # handle headers of df_affinity section headers = df_affinity.columns subheaders = ['cal/mol e-']*len(headers) multicolumns = pd.MultiIndex.from_arrays( [headers, subheaders], names=['Sample', '']) df_affinity.columns = multicolumns # handle headers of df_energy section headers = df_energy.columns subheaders = ['cal/kg.H2O']*len(headers) multicolumns = pd.MultiIndex.from_arrays( [headers, subheaders], names=['Sample', '']) df_energy.columns = multicolumns df_join = df_join.join(df_affinity) df_join = df_join.join(df_energy) out_dict = {'sample_data': {}, 'report': df_join, 'input': df_input, 'report_divs': report_divs} if get_mass_contribution: out_dict['mass_contribution'] = mass_contribution sample_data = batch_3o.rx2('sample_data') # assemble sample data for i, sample in enumerate(sample_data): dict_sample_data = { "filename": str(sample.rx2('filename')[0]), "name": str(sample.rx2('name')[0]), "temperature": float(sample.rx2('temperature')[0]), "pressure": float(sample.rx2('pressure')[0]), "logact_H2O": float(sample.rx2('logact_H2O')[0]), "H2O_density": float(sample.rx2('H2O_density')[0]), "H2O_molality": float(sample.rx2('H2O_molality')[0]), "H2O_log_molality": float(sample.rx2('H2O_log_molality')[0]), } if get_aq_dist: sample_aq_dist = ro.conversion.rpy2py(sample.rx2('aq_distribution')) sample_aq_dist = sample_aq_dist.apply(pd.to_numeric, errors='coerce') sample_pH = -sample_aq_dist.loc["H+", "log_activity"] out_dict["report"].loc[str(sample.rx2('name')[0]), "pH"] = sample_pH dict_sample_data.update({"aq_distribution": sample_aq_dist}) if get_mass_contribution: sample_mass_contribution = mass_contribution[mass_contribution["sample"] == sample.rx2('name')[0]] dict_sample_data.update( {"mass_contribution": sample_mass_contribution}) if get_mineral_sat: dict_sample_data.update( {"mineral_sat": ro.conversion.rpy2py(sample.rx2('mineral_sat')).apply(pd.to_numeric, errors='coerce')}) # replace sample mineral_sat entry with None if there is no mineral saturation data. if(len(dict_sample_data['mineral_sat'].index) == 1 and dict_sample_data['mineral_sat'].index[0] == 'None'): dict_sample_data['mineral_sat'] = None if get_redox: dict_sample_data.update( {"redox": ro.conversion.rpy2py(sample.rx2('redox')).apply(pd.to_numeric, errors='coerce')}) if get_charge_balance: dict_sample_data.update({"charge_balance": df_charge_balance.loc[sample.rx2('name')[0], :]}) if get_ion_activity_ratios: try: dict_sample_data.update( {"ion_activity_ratios": ro.conversion.rpy2py(sample.rx2('ion_activity_ratios'))}) except: dict_sample_data['ion_activity_ratios'] = None if get_fugacity: dict_sample_data.update( {"fugacity": ro.conversion.rpy2py(sample.rx2('fugacity')).apply(pd.to_numeric, errors='coerce')}) # replace sample fugacity entry with None if there is no fugacity data. if(len(dict_sample_data['fugacity'].index) == 1 and dict_sample_data['fugacity'].index[0] == 'None'): dict_sample_data['fugacity'] = None else: dict_sample_data["fugacity"]["fugacity"] = 10**dict_sample_data["fugacity"]["log_fugacity"] if get_basis_totals: sc_dist = ro.conversion.rpy2py(sample.rx2('basis_totals')) sc_dist = sc_dist.apply(pd.to_numeric, errors='coerce') dict_sample_data.update({"basis_totals": sc_dist}) if get_solid_solutions: sample_solid_solutions = batch_3o.rx2["sample_data"].rx2[sample.rx2('name')[0]].rx2["solid_solutions"] if not type(sample_solid_solutions.names) == rpy2.rinterface_lib.sexp.NULLType: ss_df_list = [] for ss in list(sample_solid_solutions.names): df_ss_ideal = sample_solid_solutions.rx2[ss].rx2["ideal solution"] df_ss_mineral = sample_solid_solutions.rx2[ss].rx2["mineral"] df_merged = pd.merge(df_ss_mineral, df_ss_ideal, left_on='mineral', right_on='component', how='left') df_merged.insert(0, 'solid solution', ss) del df_merged['component'] ss_df_list.append(df_merged) dict_sample_data.update( {"solid_solutions": pd.concat(ss_df_list)}) if get_affinity_energy: dict_sample_data.update({"affinity_energy_raw": ro.conversion.rpy2py( sample.rx2('affinity_energy_raw'))}) dict_sample_data.update( {"affinity_energy": ro.conversion.rpy2py(sample.rx2('affinity_energy'))}) out_dict["sample_data"].update( {sample_data.names[i]: dict_sample_data}) out_dict.update({"batch_3o": batch_3o}) out_dict.update({"water_model":water_model, "grid_temp":grid_temp, "grid_press":grid_press}) speciation = Speciation(out_dict, hide_traceback=self.hide_traceback) if get_affinity_energy: speciation.half_cell_reactions = self.half_cell_reactions speciation.affinity_energy_reactions_table = self.affinity_energy_reactions_table speciation.affinity_energy_formatted_reactions = self.affinity_energy_formatted_reactions speciation.show_redox_reactions = self.show_redox_reactions if report_filename != None: if ".csv" in report_filename[-4:]: out_dict["report"].to_csv(report_filename) else: out_dict["report"].to_csv(report_filename+".csv") if delete_generated_folders: self._delete_rxn_folders() if self.verbose > 0: print("Finished!") return speciation def create_data0(self, db, filename, filename_ss=None, data0_formula_ox_name=None, suppress_redox=[], water_model="SUPCRT92", exceed_Ttr=True, grid_temps=[0.0100, 50.0000, 100.0000, 150.0000, 200.0000, 250.0000, 300.0000, 350.0000], grid_press="Psat", infer_formula_ox=False, generate_template=True, template_name=None, template_type="strict", verbose=1): """ Create a data0 file from a custom thermodynamic dataset. Parameters ---------- db : str Desired three letter code of data0 output. filename : str Name of csv file containing thermodynamic data in the OBIGT format. filename_ss : str, optional Name of file containing solid solution parameters. data0_formula_ox_name : str, optional Name of supplementary file containing data0 parameters and inferred formula oxidation states. Ignored if `infer_formula_ox` is False. See `infer_formula_ox` for more detail. suppress_redox : list of str, default [] Suppress equilibrium between oxidation states of listed elements (Cl, H, and O cannot be included). water_model : str, default "SUPCRT92" This is an experimental feature that is not yet fully supported. Desired water model. Can be either "SUPCRT92", "IAPWS95", or "DEW". These models are described here: http://chnosz.net/manual/water.html exceed_Ttr : bool, default True Calculate Gibbs energies of mineral phases and other species beyond their transition temperatures? grid_temps : list of eight float, default [0.0100, 50.0000, 100.0000, 150.0000, 200.0000, 250.0000, 300.0000, 350.0000] Eight temperature values that make up the T-P grid. grid_press : list of float, default "Psat" Eight pressure values that make up the T-P grid. "Psat" for calculations along the liquid-vapor saturation curve. infer_formula_ox : bool, default False Create a supplementary file containing data0 parameters and inferred formula oxidation states? This option is useful for creating as many entries in the formula_ox column when creating a new supplementary file. Note that compounds like DySO4+ result in blank entries in formula_ox because the redox states of two elements, Dy and S, would have to be estimated together; S has many oxidation states and Dy's oxidation states are not hard-coded. generate_template : bool, default True Generate a CSV sample input template customized to this data0? Columns include 'Sample', 'Temperature', 'logfO2', and all strict basis species. template_name : str, optional Name of the sample input template file generated. If no name is supplied, defaults to 'sample_template_xyz.csv', where 'xyz' is the three letter code given to `db`. Ignored if `generate_template` is False. template_type : str, either 'strict', 'all basis', or 'all species' Determines which columns are written to the sample template. - 'strict' includes strict basis species - 'all basis' includes strict and auxiliary basis species - 'all species' includes all species in the thermodynamic database Ignored if `generate_template` is False. verbose : int, 0, 1, or 2, default 1 Level determining how many messages are returned during a calculation. 2 for all messages, 1 for errors or warnings only, 0 for silent. """ # Check that thermodynamic database input files exist and are formatted # correctly. self._check_database_file(filename) if filename_ss != None: self.__file_exists(filename_ss) self.verbose = verbose if self.verbose >= 1: print("Creating data0.{}...".format(db), flush=True) if len(grid_temps) > 8 or len(grid_temps) < 1: self.err_handler.raise_exception("'grid_temps' must have eight values.") if isinstance(grid_press, list): if len(grid_press) > 8 or len(grid_press) < 1: self.err_handler.raise_exception("'grid_press' must have eight values.") if sum([T >= 10000 for T in grid_temps]): self.err_handler.raise_exception("Grid temperatures must be below 10000 °C.") if isinstance(grid_press, list): if sum([P >= 10000 for P in grid_press]): self.err_handler.raise_exception("Grid pressures must be below 10000 bars.") if water_model == "SUPCRT92": min_T = 0 max_T = 2250 min_P = 1 max_P = 30000 elif water_model == "IAPWS95": min_T = 0 max_T = 1000 min_P = 1 max_P = 10000 elif water_model == "DEW": min_T = 0 max_T = 1000 min_P = 1 max_P = 60000 else: self.err_handler.raise_exception("The water model '{}' ".format(water_model)+"is not " "recognized. Try 'SUPCRT92', 'IAPWS95', or 'DEW'.") # check that T and P are above minimum values if sum([T <= min_T for T in grid_temps]): print("WARNING: one or more temperatures in 'grid_temps' is below " "or equal to {} °C".format(min_T)+" and is outside the valid " "temperature range for the {} water model.".format(water_model)) if isinstance(grid_press, list): if sum([P < min_P for P in grid_press]): print("WARNING: one or more pressures in 'grid_press' is below " "{} bar".format(min_P)+", the minimum valid " "pressure for the {} water model.".format(water_model)) # check that T and P are below maximum values if sum([T > max_T for T in grid_temps]): print("WARNING: one or more temperatures in 'grid_temps' is above " "{} °C".format(max_T)+", the maximum valid " "temperature for the {} water model.".format(water_model)) if isinstance(grid_press, list): if sum([P > max_P for P in grid_press]): print("WARNING: one or more pressures in 'grid_press' is above " "{} bar".format(max_P)+", the maximum valid " "pressure for the {} water model.".format(water_model)) if water_model != "SUPCRT92": print("WARNING: water models other than SUPCRT92 are not yet fully supported.") template = pkg_resources.resource_string( __name__, 'data0.min').decode("utf-8") grid_temps = convert_to_RVector(grid_temps) grid_press = convert_to_RVector(grid_press) suppress_redox = convert_to_RVector(suppress_redox) if filename_ss == None: filename_ss = ro.r("NULL") if data0_formula_ox_name == None: data0_formula_ox_name = ro.r("NULL") if template_name == None: template_name = "sample_template_{}.csv".format(db) if template_type not in ['strict', 'all basis', 'all species']: self.err_handler.raise_exception("template_type {} ".format(template_type)+"is not" "recognized. Try 'strict', 'all basis', or 'all species'") self.__capture_r_output() r_create_data0 = pkg_resources.resource_string( __name__, 'create_data0.r').decode("utf-8") ro.r(r_create_data0) ro.r.main_create_data0(filename=filename, filename_ss=filename_ss, grid_temps=grid_temps, grid_press=grid_press, db=db, water_model=water_model, template=template, exceed_Ttr=exceed_Ttr, data0_formula_ox_name=data0_formula_ox_name, suppress_redox=suppress_redox, infer_formula_ox=infer_formula_ox, generate_template=generate_template, template_name=template_name, template_type=template_type, verbose=self.verbose) for line in self.stderr: print(line) if self.verbose > 0: print("Finished creating data0.{}.".format(db)) def make_redox_reactions(self, redox_pairs="all"): """ Generate an organized collection of redox reactions for calculating chemical affinity and energy supply values during speciation. Parameters ---------- redox_pairs : list of int or "all", default "all" List of indices of half reactions in the half cell reaction table to be combined when generating full redox reactions. E.g. [0, 1, 4] will combine half reactions with indices 0, 1, and 4 in the table stored in the `half_cell_reactions` attribute of the `AqEquil` class. If "all", generate all possible redox reactions from available half cell reactions. Returns ---------- Output is stored in the `affinity_energy_reactions_raw` and `affinity_energy_reactions_table` attributes of the `AqEquil` class. """ # reset all redox variables stored in the AqEquil class self.affinity_energy_reactions_raw = None self.affinity_energy_reactions_table = None self.affinity_energy_formatted_reactions = None if self.verbose != 0: print("Generating redox reactions...") err_msg = ("redox_pairs can either be 'all' or a list of integers " "indicating the indices of half cell reactions in " "the half_cell_reactions table that should be combined into " "full redox reactions. For example, redox_pairs=[0, 1, 2, 6] " "will combine half cell reactions with indices 0, 1, 2, and 6 in " "the half_cell_reactions table. This table is an attribute in the " "class AqEquil.") if isinstance(redox_pairs, str): if redox_pairs == "all": redox_pairs = list(range(0, self.half_cell_reactions.shape[0])) else: self.err_handler.raise_exception(err_msg) elif isinstance(redox_pairs, list): if not all([isinstance(i, int) for i in redox_pairs]): self.err_handler.raise_exception(err_msg) else: self.err_handler.raise_exception(err_msg) self.redox_pairs = redox_pairs df = self.half_cell_reactions.iloc[redox_pairs].reset_index(drop=True) Oxidant_1 = df['Oxidant_1'] Oxidant_2 = df['Oxidant_2'] Oxidant_3 = df['Oxidant_3'] Reductant_1 = df['Reductant_1'] Reductant_2 = df['Reductant_2'] #CREATING A LIST OF ALL SPECIES AND THEIR ELEMENT DICTIONARIES df_oxidants_and_reductants = self.half_cell_reactions.loc[:, "Oxidant_1":].values.tolist() flat_list = flatten_list(df_oxidants_and_reductants) oxidants_and_reductants = [sp for sp in list(dict.fromkeys(flat_list)) if str(sp) != 'nan'] elements = [list(parse_formula(sp).keys()) for sp in oxidants_and_reductants] elements = flatten_list(elements) elements = list(dict.fromkeys(elements)) # get a dictionary of species and their elemental makeup: # {'HNO3': {'H': 1.0, 'N': 1.0, 'O': 3.0, '-': 0, 'Fe': 0, 'C': 0, 'S': 0, '+': 0}, ...} element_dictionary = dict() for sp in oxidants_and_reductants: element_dictionary[sp] = parse_formula(sp) for e in elements: if element_dictionary[sp].get(e, 0) == 0: element_dictionary[sp][e] = 0 df_reax = pd.DataFrame() # empty df of reactions df_reax['rO_coeff'] = '' df_reax['rO'] = '' df_reax['rR_coeff'] = '' df_reax['rR'] = '' df_reax['pO_coeff'] = '' df_reax['pO'] = '' df_reax['pR_coeff'] = '' df_reax['pR'] = '' df_reax['Reaction'] = '' index = 0 reaction = [] indices = np.arange(0, len(df['Oxidant_1']), 1).tolist()*2 # how to loop back through the redox pairs to not run into out-of-range index rxn_num = 0 # counting unique reactions rxn_list = [] # list of unique reactions rxn_names = [] rxn_pairs = [] # list of paired half reactions for i in range(0, len(df['Oxidant_1']),1): # length of redox pairs - columns for n in range(0, len(df['Oxidant_1']), 1): if Reductant_1[i] == Reductant_1[indices[i+n]] or Reductant_1[i] == Reductant_2[indices[i+n]]: # if both reductants are the same thing, skip continue if Oxidant_1[i] == Oxidant_1[indices[i+n]] or Oxidant_1[i] == Oxidant_2[indices[i+n]] or Oxidant_1[i] == Oxidant_3[indices[i+n]]: continue # if Oxidant_1[i] == 'H2O' and Reductant_1[indices[i+n]] == 'H2O': #suppress the splitting of water # continue else: # GENERATING REACTIONS BETWEEN OXIDANT_1 AND REDUCTANT_1 reaction.append(Oxidant_1[i]+' \t ' + Reductant_1[indices[i+n]] + '=' + Reductant_1[i] + ' \t ' + Oxidant_1[indices[i+n]]) rxn_num+=1 rxn_list.append(rxn_num) rxn_names.append('red_'+Oxidant_1[i]+'_'+Reductant_1[i]+'_ox_'+Reductant_1[indices[i+n]]+'_'+Oxidant_1[indices[i+n]]) df_reax.loc[index, 'rO'] = Oxidant_1[i] df_reax.loc[index, 'rR'] = Reductant_1[indices[i+n]] df_reax.loc[index, 'pR'] = Reductant_1[i] df_reax.loc[index, 'pO'] = Oxidant_1[indices[i+n]] rxn_pairs.append([i, indices[i+n]]) index+=1 # REACTIONS INVOLVING OTHER PH-DEPENDENT SPECIES if pd.isnull(Oxidant_2[i]) != True: reaction.append(Oxidant_2[i] + ' \t ' + Reductant_1[indices[i+n]] + '=' + Reductant_1[i] + ' \t ' + Oxidant_1[indices[i+n]]) rxn_list.append(rxn_num) rxn_names.append('red_'+Oxidant_1[i]+'_'+Reductant_1[i]+'_ox_'+Reductant_1[indices[i+n]]+'_'+Oxidant_1[indices[i+n]]) df_reax.loc[index, 'rO'] = Oxidant_2[i] df_reax.loc[index, 'rR'] = Reductant_1[indices[i+n]] df_reax.loc[index, 'pR'] = Reductant_1[i] df_reax.loc[index, 'pO'] = Oxidant_1[indices[i+n]] rxn_pairs.append([i, indices[i+n]]) index +=1 if pd.isnull(Reductant_2[indices[i+n]]) != True: reaction.append(Oxidant_2[i] + ' \t ' + Reductant_2[indices[i+n]] + '=' + Reductant_1[i] + ' \t ' + Oxidant_1[indices[i+n]]) rxn_list.append(rxn_num) rxn_names.append('red_'+Oxidant_1[i]+'_'+Reductant_1[i]+'_ox_'+Reductant_1[indices[i+n]]+'_'+Oxidant_1[indices[i+n]]) df_reax.loc[index, 'rO'] = Oxidant_2[i] df_reax.loc[index, 'rR'] = Reductant_2[indices[i+n]] df_reax.loc[index, 'pR'] = Reductant_1[i] df_reax.loc[index, 'pO'] = Oxidant_1[indices[i+n]] rxn_pairs.append([i, indices[i+n]]) index +=1 if pd.isnull(Reductant_2[indices[i+n]]) != True: reaction.append(Oxidant_1[i] + ' \t ' + Reductant_2[indices[i+n]] + '=' + Reductant_1[i] + ' \t ' + Oxidant_1[indices[i+n]]) rxn_list.append(rxn_num) rxn_names.append('red_'+Oxidant_1[i]+'_'+Reductant_1[i]+'_ox_'+Reductant_1[indices[i+n]]+'_'+Oxidant_1[indices[i+n]]) df_reax.loc[index, 'rO'] = Oxidant_1[i] df_reax.loc[index, 'rR'] = Reductant_2[indices[i+n]] df_reax.loc[index, 'pR'] = Reductant_1[i] df_reax.loc[index, 'pO'] = Oxidant_1[indices[i+n]] rxn_pairs.append([i, indices[i+n]]) index +=1 if pd.isnull(Oxidant_3[i]) != True: reaction.append(Oxidant_3[i] + ' \t ' + Reductant_1[indices[i+n]] + '=' + Reductant_1[i] + ' \t ' + Oxidant_1[indices[i+n]]) rxn_list.append(rxn_num) rxn_names.append('red_'+Oxidant_1[i]+'_'+Reductant_1[i]+'_ox_'+Reductant_1[indices[i+n]]+'_'+Oxidant_1[indices[i+n]]) df_reax.loc[index, 'rO'] = Oxidant_3[i] df_reax.loc[index, 'rR'] = Reductant_1[indices[i+n]] df_reax.loc[index, 'pR'] = Reductant_1[i] df_reax.loc[index, 'pO'] = Oxidant_1[indices[i+n]] rxn_pairs.append([i, indices[i+n]]) index +=1 if pd.isnull(Reductant_2[indices[i+n]]) != True: reaction.append(Oxidant_3[i] + ' \t ' + Reductant_2[indices[i+n]] + '=' + Reductant_1[i] + ' \t ' + Oxidant_1[indices[i+n]]) rxn_list.append(rxn_num) rxn_names.append('red_'+Oxidant_1[i]+'_'+Reductant_1[i]+'_ox_'+Reductant_1[indices[i+n]]+'_'+Oxidant_1[indices[i+n]]) df_reax.loc[index, 'rO'] = Oxidant_3[i] df_reax.loc[index, 'rR'] = Reductant_2[indices[i+n]] df_reax.loc[index, 'pR'] = Reductant_1[i] df_reax.loc[index, 'pO'] = Oxidant_1[indices[i+n]] rxn_pairs.append([i, indices[i+n]]) index +=1 # rxn_pairs_orig = [[redox_pairs[pair[0]], redox_pairs[pair[1]]] for pair in rxn_pairs] # self.rxn_pairs_main = np.unique(np.array(rxn_pairs_orig), axis=0).tolist() df_reax['Reaction'] = rxn_list df_reax['Names'] = rxn_names df_reax['Temp_Pairs'] = rxn_pairs # if there are no reactions, return nothing if df_reax.shape[0] == 0: incompatible_half_reactions = pd.Series(self.half_cell_reactions["Redox Couple"], index=redox_pairs).tolist() redundant_reductant_or_oxidant = [] for col in ["Oxidant_1", "Oxidant_2", "Oxidant_3", "Reductant_1", "Reductant_2"]: redox_col = pd.Series(self.half_cell_reactions[col], index=[0,1]) if redox_col.eq(redox_col[0]).all(): redundant_reductant_or_oxidant.append(redox_col[0]) err_no_rxns = ("Valid reactions could not be written between the half " "reactions {} ".format(incompatible_half_reactions)+"because " "{}".format(redundant_reductant_or_oxidant)+" is on both sides " "of all reactions.") print(err_no_rxns) return ### BALANCING NON-O, H ELEMENTS for r in range(0, len(df_reax['rO'])): count = 0 #to restart the loop through the elements temp_rO_coeff = [1] *4 #C, N, S, Fe temp_rR_coeff = [1] *4 #C, N, S, Fe temp_pO_coeff = [1] *4 #C, N, S, Fe temp_pR_coeff = [1] *4 #C, N, S, Fe for e in ['C','N','S','Fe']: temp1 = int(element_dictionary[df_reax['rO'][r]][e]) #count for the element in the list for rO at index r temp2 = int(element_dictionary[df_reax['pR'][r]][e]) temp3 = int(element_dictionary[df_reax['rR'][r]][e]) temp4 = int(element_dictionary[df_reax['pO'][r]][e]) if temp1 == temp2: temp_rO_coeff[count] = 1 temp_pR_coeff[count] = 1 if temp1 != temp2: if temp1 ==0: temp_rO_coeff[count] = 1 if temp1 != 0: temp_rO_coeff[count] = np.lcm(temp1,temp2)/temp1 if temp2 == 0: temp_pR_coeff[count] = 1 if temp2 != 0: temp_pR_coeff[count] = np.lcm(temp1,temp2)/temp2 if temp3 == temp4: temp_rR_coeff[count] = 1 temp_pO_coeff[count] = 1 if temp4 != temp3: if temp3 == 0: temp_rR_coeff[count] = 1 if temp3 != 0: temp_rR_coeff[count] = np.lcm(temp3,temp4)/temp3 if temp4 == 0.0: temp_pO_coeff[count] = 1 if temp4 !=0.0: temp_pO_coeff[count] = np.lcm(temp3,temp4)/temp4 count +=1 df_reax.loc[r, 'rO_coeff'] = -max(temp_rO_coeff) df_reax.loc[r, 'rR_coeff'] = -max(temp_rR_coeff) df_reax.loc[r, 'pR_coeff'] = max(temp_pR_coeff) df_reax.loc[r, 'pO_coeff'] = max(temp_pO_coeff) all_reax = df_reax.copy(deep=True) all_reax['rO_2_coeff'] = '' all_reax['rO_2'] = '' all_reax['rO_3_coeff'] = '' all_reax['rO_3'] = '' all_reax['rR_2_coeff'] = '' all_reax['rR_2'] = '' ### MAIN REACTION for r in range(1, max(all_reax['Reaction']+1)): # each reaction number once, 1 to 305 if len(all_reax[all_reax['Reaction']==r].index.values) == 1: # if nothing to combine, skip continue else: temp = all_reax[all_reax['Reaction']==r].index.values[0] #index of first instance of this reaction which has multiple subreactions lst2 = [] all_reax.loc[temp-0.5] = all_reax.loc[temp] # replicating the row to build on all_reax = all_reax.sort_index() # putting the replicated row above the first instance for i in all_reax[all_reax['Reaction']==r].index.values[2:]: #all but the first instance in the reactions (since that's copied already) if all_reax.loc[i, 'rO'] != all_reax.loc[temp, 'rO'] and all_reax.loc[i, 'rO'] not in lst2: # if rO is new (and not the same as the first) lst2.append(all_reax.loc[i, 'rO']) # list unique rO besides the first for l in range(0, len(lst2)): # looping through unique rO temp2 = 'rO_'+str(2+int(l)) # adding 0 or 1 to the rO number all_reax.loc[temp-0.5,str(temp2)] = lst2[l] # add the unique rO to rO_2 or 3 if all_reax.loc[i, 'rR'] != all_reax.loc[temp, 'rR']: # if rR is new all_reax.loc[temp-0.5,'rR_2'] = all_reax.loc[i, 'rR'] # add it to rR_2 ##CHANGING COEFFICIENTS rO = all_reax.loc[temp-0.5,'rO'] #assigning easy variables rO_2 = all_reax.loc[temp-0.5,'rO_2'] rO_3 = all_reax.loc[temp-0.5,'rO_3'] rR = all_reax.loc[temp-0.5,'rR'] rR_2 = all_reax.loc[temp-0.5,'rR_2'] rO_coeff = all_reax.loc[temp-0.5,'rO_coeff'] #these are empty at the moment rO_2_coeff = all_reax.loc[temp-0.5,'rO_2_coeff'] rO_3_coeff = all_reax.loc[temp-0.5,'rO_3_coeff'] rR_2_coeff = all_reax.loc[temp-0.5,'rR_2_coeff'] rR_coeff = all_reax.loc[temp-0.5,'rR_coeff'] if rO_3 != '' and rO_2 != '': #if DIC is the oxidant all_reax.loc[temp-0.5,'rO_coeff'] = rO_coeff/3 #this works fine all_reax.loc[temp-0.5,'rO_2_coeff'] = rO_coeff/3 all_reax.loc[temp-0.5,'rO_3_coeff'] = rO_coeff/3 if rR_2 != '': all_reax.loc[temp-0.5,'rR_coeff'] = rR_coeff/2 #this works fine all_reax.loc[temp-0.5,'rR_2_coeff'] = rR_coeff/2 all_reax.loc[temp-0.4] = all_reax.loc[temp-0.5] #NEW ROW WITH ONLY rR - this works fine : line 4 all_reax.loc[temp-0.4, 'rR_2_coeff'] = 0 all_reax.loc[temp-0.4, 'rR_coeff'] = rR_coeff all_reax.loc[temp-0.3] = all_reax.loc[temp-0.5] #NEW ROW WITH ONLY rR_2 - this works fine: line 5 all_reax.loc[temp-0.3, 'rR_2_coeff'] = rR_coeff all_reax.loc[temp-0.3, 'rR_coeff'] = 0 #NEW ROWS WITH TWO DIC AND BOTH rR all_reax.loc[temp-0.25] = all_reax.loc[temp-0.5] #eliminate CO2 all_reax.loc[temp-0.25, 'rO_coeff'] = 0 all_reax.loc[temp-0.25, 'rO_2_coeff'] = rO_coeff/2 all_reax.loc[temp-0.25, 'rO_3_coeff'] = rO_coeff/2 all_reax.loc[temp-0.24] = all_reax.loc[temp-0.5] #eliminate HCO3- all_reax.loc[temp-0.24, 'rO_2_coeff'] = 0 all_reax.loc[temp-0.24, 'rO_coeff'] = rO_coeff/2 all_reax.loc[temp-0.24, 'rO_3_coeff'] = rO_coeff/2 all_reax.loc[temp-0.23] = all_reax.loc[temp-0.5] #eliminate CO3-2 all_reax.loc[temp-0.23, 'rO_3_coeff'] = 0 all_reax.loc[temp-0.23, 'rO_2_coeff'] = rO_coeff/2 all_reax.loc[temp-0.23, 'rO_coeff'] = rO_coeff/2 all_reax.loc[temp-0.2] = all_reax.loc[temp-0.4] #NEW ROW WITH ONLY rR ELIMINATING CO2: line 6 all_reax.loc[temp-0.2, 'rO_coeff'] = 0 all_reax.loc[temp-0.2, 'rO_2_coeff'] = rO_coeff/2 all_reax.loc[temp-0.2, 'rO_3_coeff'] = rO_coeff/2 all_reax.loc[temp-0.1] = all_reax.loc[temp-0.4] #NEW ROW WITH ONLY rR ELIMINATING HCO3-: line 7 all_reax.loc[temp-0.1, 'rO_coeff'] = rO_coeff/2 all_reax.loc[temp-0.1, 'rO_2_coeff'] = 0 all_reax.loc[temp-0.1, 'rO_3_coeff'] = rO_coeff/2 all_reax.loc[temp-0.05] = all_reax.loc[temp-0.4] #NEW ROW WITH ONLY rR ELIMINATING CO3-2: line 8 all_reax.loc[temp-0.05, 'rO_coeff'] = rO_coeff/2 all_reax.loc[temp-0.05, 'rO_2_coeff'] = rO_coeff/2 all_reax.loc[temp-0.05, 'rO_3_coeff'] = 0 all_reax.loc[temp-0.04] = all_reax.loc[temp-0.3] #NEW ROW WITH ONLY rR_2 ELIMINATING CO2: line 9 all_reax.loc[temp-0.04, 'rO_coeff'] = 0 all_reax.loc[temp-0.04, 'rO_2_coeff'] = rO_coeff/2 all_reax.loc[temp-0.04, 'rO_3_coeff'] = rO_coeff/2 all_reax.loc[temp-0.03] = all_reax.loc[temp-0.3] #NEW ROW WITH ONLY rR_2 ELIMINATING HCO3-: line 10 all_reax.loc[temp-0.03, 'rO_coeff'] = rO_coeff/2 all_reax.loc[temp-0.03, 'rO_2_coeff'] = 0 all_reax.loc[temp-0.03, 'rO_3_coeff'] = rO_coeff/2 all_reax.loc[temp-0.02] = all_reax.loc[temp-0.3] #NEW ROW WITH ONLY rR_2 ELIMINATING CO3-2: line 11 all_reax.loc[temp-0.02, 'rO_coeff'] = rO_coeff/2 all_reax.loc[temp-0.02, 'rO_2_coeff'] = rO_coeff/2 all_reax.loc[temp-0.02, 'rO_3_coeff'] = 0 all_reax.loc[temp-0.01] = all_reax.loc[temp-0.5] #NEW ROW WITH ONLY CO2 and both RR all_reax.loc[temp-0.01, 'rO_coeff'] = rO_coeff all_reax.loc[temp-0.01, 'rO_2_coeff'] = 0 all_reax.loc[temp-0.01, 'rO_3_coeff'] = 0 all_reax.loc[temp-0.009] = all_reax.loc[temp-0.5] #NEW ROW WITH ONLY HCO3- and both RR all_reax.loc[temp-0.009, 'rO_coeff'] = 0 ### all_reax.loc[temp-0.009, 'rO_2_coeff'] = rO_coeff all_reax.loc[temp-0.009, 'rO_3_coeff'] = 0 all_reax.loc[temp-0.008] = all_reax.loc[temp-0.5] #NEW ROW WITH ONLY CO2 and both RR all_reax.loc[temp-0.008, 'rO_coeff'] = 0 all_reax.loc[temp-0.008, 'rO_2_coeff'] = 0 all_reax.loc[temp-0.008, 'rO_3_coeff'] = rO_coeff if rR_2 == '': all_reax.loc[temp-0.2] = all_reax.loc[temp-0.5] #NEW ROW ELIMINATING CO2 all_reax.loc[temp-0.2, 'rO_coeff'] = 0 all_reax.loc[temp-0.2, 'rO_2_coeff'] = rO_coeff/2 all_reax.loc[temp-0.2, 'rO_3_coeff'] = rO_coeff/2 all_reax.loc[temp-0.1] = all_reax.loc[temp-0.5] #NEW ROW ELIMINATING HCO3- all_reax.loc[temp-0.1, 'rO_coeff'] = rO_coeff/2 all_reax.loc[temp-0.1, 'rO_2_coeff'] = 0 all_reax.loc[temp-0.1, 'rO_3_coeff'] = rO_coeff/2 all_reax.loc[temp-0.05] = all_reax.loc[temp-0.5] #NEW ROW ELIMINATING CO3-2 all_reax.loc[temp-0.05, 'rO_coeff'] = rO_coeff/2 all_reax.loc[temp-0.05, 'rO_2_coeff'] = rO_coeff/2 all_reax.loc[temp-0.05, 'rO_3_coeff'] = 0 if rO_2 != '' and rO_3 == '': # IF THERE ARE TWO OXIDANT OPTIONS all_reax.loc[temp-0.5,'rO_coeff'] = rO_coeff/2 all_reax.loc[temp-0.5,'rO_2_coeff'] = rO_coeff/2 if rR_2 != '': #IF THERE ARE TWO REDUCTANT OPTIONS all_reax.loc[temp-0.5,'rR_coeff'] = rR_coeff/2 all_reax.loc[temp-0.5,'rR_2_coeff'] = rR_coeff/2 all_reax.loc[temp-0.4] = all_reax.loc[temp-0.5] #NEW ROW WITH ONLY rR all_reax.loc[temp-0.4, 'rR_2_coeff'] = 0 all_reax.loc[temp-0.4, 'rR_coeff'] = rR_coeff all_reax.loc[temp-0.3] = all_reax.loc[temp-0.5] #NEW ROW WITH ONLY rR_2 all_reax.loc[temp-0.3, 'rR_2_coeff'] = rR_coeff all_reax.loc[temp-0.3, 'rR_coeff'] = 0 all_reax.loc[temp-0.2] = all_reax.loc[temp-0.5] #NEW ROW WITH ONLY rO_2 all_reax.loc[temp-0.2, 'rO_2_coeff'] = rO_coeff all_reax.loc[temp-0.2, 'rO_coeff'] = 0 all_reax.loc[temp-0.1] = all_reax.loc[temp-0.5] #NEW ROW WITH ONLY rO all_reax.loc[temp-0.1, 'rO_2_coeff'] = 0 all_reax.loc[temp-0.1, 'rO_coeff'] = rO_coeff if rO_2 == '' and rO_3 == '' and rR_2 != '': # IF THERE IS ONLY ONE OXIDANT BUT TWO REDUCTANTS all_reax.loc[temp-0.5,'rR_coeff'] = rR_coeff/2 all_reax.loc[temp-0.5,'rR_2_coeff'] = rR_coeff/2 all_reax = all_reax.sort_index().reset_index(drop=True) pair_list = [] for i in range(0, len(all_reax['Temp_Pairs'])): pair_list.append([redox_pairs[all_reax.loc[i, 'Temp_Pairs'][0]], redox_pairs[all_reax.loc[i, 'Temp_Pairs'][1]]]) all_reax['pairs'] = pair_list reax = all_reax.copy(deep=True) reax.drop('Temp_Pairs', axis=1, inplace=True) reax.drop('pairs', axis=1, inplace=True) reax.reset_index(drop=True, inplace=True) for s in ['Fe', 'O', 'H','-','+']: for i in range(0,len(reax['rO'])): ### assigning temporary values to each species temp_rO_coeff = reax['rO_coeff'][i] temp_rO = element_dictionary[reax['rO'][i]][s] if reax['rO_2_coeff'][i] != '': temp_rO_2_coeff = reax['rO_2_coeff'][i] temp_rO_2 = element_dictionary[reax['rO_2'][i]][s] else: temp_rO_2_coeff = 0 temp_rO_2 = 0 if reax['rO_3_coeff'][i] != '': temp_rO_3_coeff = reax['rO_3_coeff'][i] temp_rO_3 = element_dictionary[reax['rO_3'][i]][s] else: temp_rO_3_coeff = 0 temp_rO_3 = 0 temp_rR_coeff = reax['rR_coeff'][i] temp_rR = element_dictionary[reax['rR'][i]][s] if reax['rR_2_coeff'][i] != '': temp_rR_2_coeff = reax['rR_2_coeff'][i] temp_rR_2 = element_dictionary[reax['rR_2'][i]][s] else: temp_rR_2_coeff = 0 temp_rR_2 = 0 temp_pO_coeff = reax['pO_coeff'][i] temp_pO = element_dictionary[reax['pO'][i]][s] temp_pR_coeff = reax['pR_coeff'][i] temp_pR = element_dictionary[reax['pR'][i]][s] r = 0-(temp_rO*temp_rO_coeff+temp_pR*temp_pR_coeff+temp_rO_2*temp_rO_2_coeff+temp_rO_3*temp_rO_3_coeff) o = 0-(temp_rR*temp_rR_coeff+temp_rR_2*temp_rR_2_coeff+temp_pO*temp_pO_coeff) reax.loc[i, 'r_'+s] = r reax.loc[i, 'o_'+s] = o reax['r_H'] = reax['r_H'] - 2*reax['r_O'] reax['o_H'] = reax['o_H'] - 2*reax['o_O'] reax['r_+'] = reax['r_+'] - 2*reax['r_Fe'] - reax['r_H'] reax['o_+'] = reax['o_+'] - 2*reax['o_Fe'] - reax['o_H'] reax['r_e-'] = reax['r_+'] - reax['r_-'] reax['o_e-'] = reax['o_+'] - reax['o_-'] reax.rename({'r_Fe': 'r_Fe+2', 'r_O': 'r_H2O', 'r_H': 'r_H+', 'o_Fe': 'o_Fe+2', 'o_O': 'o_H2O', 'o_H': 'o_H+'}, axis=1, inplace = True) ### MULTIPLYING SUB-REACTIONS lcm_charge = [] electrons = [] for i in range(0, len(reax['rO'])): # # for i in range(0, 10): lcm_charge = np.lcm(round(reax['r_e-'][i]), round(reax['o_e-'][i])) electrons.append(str(lcm_charge)+'e') r_multiplier = abs(lcm_charge/int(reax['r_e-'][i])) o_multiplier = abs(lcm_charge/int(reax['o_e-'][i])) for r in ['rO_coeff', 'pR_coeff', 'r_H2O', 'r_Fe+2', 'r_H+']: reax.loc[i, r] = reax.loc[i, r]*r_multiplier for o in ['rR_coeff', 'pO_coeff', 'o_H2O', 'o_Fe+2', 'o_H+']: reax.loc[i, o] = reax.loc[i, o]*o_multiplier if reax.loc[i, 'rO_2_coeff'] != '': reax.loc[i, 'rO_2_coeff'] = reax.loc[i, 'rO_2_coeff']*r_multiplier if reax.loc[i, 'rO_3_coeff'] != '': reax.loc[i, 'rO_3_coeff'] = reax.loc[i, 'rO_3_coeff']*r_multiplier if reax.loc[i, 'rR_2_coeff'] != '': reax.loc[i, 'rR_2_coeff'] = reax.loc[i, 'rR_2_coeff']*o_multiplier reax['H+'] = reax['r_H+'] + reax['o_H+'] reax['protons'] = 'H+' reax['H2O'] = reax['r_H2O'] + reax['o_H2O'] reax['water'] = 'H2O' reax['Fe+2'] = reax['r_Fe+2'] + reax['o_Fe+2'] reax['iron'] = 'Fe+2' reax.drop(columns = ['r_Fe+2', 'r_H2O', 'r_H+', 'r_-', 'r_+', 'r_e-', 'o_Fe+2', 'o_H2O', 'o_H+', 'o_-', 'o_+', 'o_e-'], axis=1, inplace=True) old_new_dict = {'SO4-2':'SO4-2', 'HSO4-':'HSO4-','FeS2':'pyrite', 'H2S':'H2S', 'H2O':'H2O', 'O2':'O2', 'H2':'H2', 'Fe2O3':'hematite','Fe3O4':'magnetite', 'CO2':'CO2', 'CH4':'METHANE', 'HCO3-':'HCO3-','CO3-2':'CO3-2', 'NH3':'NH3', 'NH4+':'NH4+', 'NO2-':'NO2-', 'HNO2':'HNO2', 'NO3-':'NO3-', 'HNO3':'HNO3', 'S':'sulfur', 'FeOOH':'goethite', 'CO':'CO', 'H+':'H+', 'Fe+2':'Fe+2', 'HS-':'HS-'} old_new_dict = {old:'start'+new+'end' for old,new in zip(old_new_dict.keys(), old_new_dict.values())} real_reax = reax.replace(old_new_dict.keys(), old_new_dict.values()) count = 0 rxn_count = [] rxn_number = [] for i in real_reax['Reaction']: if i not in rxn_count: rxn_count.append(i) rxn_number.append(real_reax['Names'][count]) name_count = 1 else: rxn_number.append(real_reax['Names'][count]+ '_'+str(name_count)+'_sub') name_count += 1 count += 1 real_reax.insert(0, 'Reaction Number', rxn_number) # # real_reax.insert(1, 'electrons', '1e') real_reax.insert(1, 'electrons', electrons) lst3 = [] #list of reaction numbers lst4 = [] #list of reactions with issues for i in range(0, len(real_reax['Reaction'])): if real_reax['Reaction'][i] not in lst3: lst3.append(real_reax['Reaction'][i]) for j in lst3: #looping through reaction numbers first_e = real_reax.loc[real_reax['Reaction'] == j]['electrons'].reset_index(drop=True)[0] for k in real_reax.loc[real_reax['Reaction'] == j]['electrons'].reset_index(drop=True): if k != first_e: if j not in lst4: lst4.append(j) for l in lst4: print(real_reax.loc[real_reax['Reaction'] == l]) real_reax.drop(labels='Reaction', axis=1, inplace = True) real_reax.drop(columns = 'Names', inplace = True) #real_reax.to_csv('template.txt', sep ='\t', header=False, index=False) file = real_reax.to_csv(sep='\t', header=False, index=False, line_terminator='\n') file = file.split("\n") # Formatting species to preserve charges while using regex package new2 = [] for i in old_new_dict.values(): i = i.replace('+','\+') i = i.replace('-','\-') new2.append(i) newlines = [] for line in file: line = line.strip() for s in new2: number = 0 match = re.findall('\W+\d+\.\d+\t'+s, line) if len(match) > 1: for m in match: pos = re.findall('\t(\d+\.\d+)', m) if len(pos) > 0: number += float(pos[0]) neg = re.findall('-\d+\.\d+', m) if len(neg) > 0: number += float(neg[0]) line = line.replace(m,'') line = line + '\t' + str(number) + '\t' + s new_match = re.findall('\t0\.0\t'+s, line) + re.findall('\t0\t'+s, line) + re.findall('\t0\t'+s+'$', line) if len(new_match) > 0: for n in new_match: line = line.replace(n, '') line = re.sub('\t+', '\t', line) line = line.replace('\t0.0\tH\t', '\t') line = line.replace('\tH\t','\tH+\t') line = line.replace('\\', '') line = line.replace('start', '') line = line.replace('end', '') line = line.strip() newlines.append(line) self.affinity_energy_reactions_raw = "\n".join(newlines) df_rxn = pd.DataFrame([x.split('\t') for x in self.affinity_energy_reactions_raw.split('\n')]) df_rxn.columns = df_rxn.columns.map(str) df_rxn = df_rxn.rename(columns={"0": "reaction_name", "1": "mol_e-_transferred_per_mol_rxn"}) df_rxn.insert(1, 'redox_pairs', all_reax['pairs']) df_rxn = df_rxn.set_index("reaction_name") df_rxn = df_rxn[df_rxn['mol_e-_transferred_per_mol_rxn'].notna()] self.affinity_energy_reactions_table = df_rxn # rxn_pairs_all = [] # n = 0 # prev_was_sub = False # for i, rxn in enumerate(self.affinity_energy_reactions_table.index.tolist()): # if "_sub" not in rxn[-4:]: # if prev_was_sub: # n += 1 # prev_was_sub = False # rxn_pairs_all.append(self.rxn_pairs_main[n]) # else: # rxn_pairs_all.append(self.rxn_pairs_main[n]) # prev_was_sub = True # self.rxn_pairs_all = rxn_pairs_all # self.affinity_energy_reactions_table.insert(0, "redox_pairs", rxn_pairs_all) prev_was_coeff = False n = 1 for col in self.affinity_energy_reactions_table.iloc[:, 2:].columns: if not prev_was_coeff: new_col_name = "coeff_"+str(n) prev_was_coeff = True else: new_col_name = "species_"+str(n) prev_was_coeff = False n += 1 self.affinity_energy_reactions_table = self.affinity_energy_reactions_table.rename(columns={col: new_col_name}) nonsub_reaction_names = [name for name in self.affinity_energy_reactions_table.index if "_sub" not in name[-4:]] if self.verbose != 0: print("{} redox reactions have been generated.".format(len(nonsub_reaction_names))) def show_redox_reactions(self, formatted=True, charge_sign_at_end=False, hide_subreactions=True, simplify=True, show=True): """ Show a table of redox reactions generated with the function `make_redox_reactions`. Parameters ---------- formatted : bool, default True Should reactions be formatted for html output? charge_sign_at_end : bool, default False Display charge with sign after the number (e.g. SO4 2-)? Ignored if `formatted` is False. hide_subreactions : bool, default True Hide subreactions? show : bool, default False Show the table of reactions? Ignored if not run in a Jupyter notebook. Returns ---------- A pandas dataframe containing balanced redox reactions written in full. """ self.affinity_energy_formatted_reactions = copy.copy(self.affinity_energy_reactions_table.iloc[:, 0:1]) df = copy.copy(self.affinity_energy_reactions_table) if simplify: main_rxn_names = df.loc[[ind for ind in df.index if "_sub" not in ind[-4:]]].index df = df.iloc[[i-1 for i in range(0, len(df.index)) if "_sub" not in df.index[i][-4:]]] self.affinity_energy_formatted_reactions = copy.copy(df.iloc[:, 0:1]) reactions = [] for irow in range(0, df.shape[0]): redox_pair = df.loc[df.index[irow], "redox_pairs"] oxidant_1 = self.half_cell_reactions.loc[self.half_cell_reactions.index[redox_pair[0]], "Oxidant_1"] oxidant_2 = self.half_cell_reactions.loc[self.half_cell_reactions.index[redox_pair[0]], "Oxidant_2"] oxidant_3 = self.half_cell_reactions.loc[self.half_cell_reactions.index[redox_pair[0]], "Oxidant_3"] reductant_1 = self.half_cell_reactions.loc[self.half_cell_reactions.index[redox_pair[1]], "Reductant_1"] reductant_2 = self.half_cell_reactions.loc[self.half_cell_reactions.index[redox_pair[1]], "Reductant_2"] oxidants = [ox for ox in [oxidant_1, oxidant_2, oxidant_3] if str(ox) != 'nan'] reductants = [rd for rd in [reductant_1, reductant_2] if str(rd) != 'nan'] if len(oxidants) > 1: oxidant_sigma_needed = True else: oxidant_sigma_needed = False if len(reductants) > 1: reductant_sigma_needed = True else: reductant_sigma_needed = False rxn_row = df.iloc[irow, 2:] rxn = rxn_row[rxn_row.notna()] coeffs = copy.copy(rxn[::2]).tolist() names = copy.copy(rxn[1::2]).tolist() # print(rxn.name) if oxidant_sigma_needed or reductant_sigma_needed: reactant_names = [names[i] for i in range(0, len(names)) if float(coeffs[i]) < 0] for sp in reactant_names: if sp in oxidants and oxidant_sigma_needed: i = names.index(sp) names[i] = u"\u03A3"+sp if sp in reductants and reductant_sigma_needed: i = names.index(sp) names[i] = u"\u03A3"+sp react_grid = pd.DataFrame({"coeff":coeffs, "name":names}) react_grid["coeff"] = pd.to_numeric(react_grid["coeff"]) react_grid = react_grid.astype({'coeff': 'float'}) reactants = " + ".join([(str(-int(react_grid["coeff"][i]) if react_grid["coeff"][i].is_integer() else -react_grid["coeff"][i])+" " if -react_grid["coeff"][i] != 1 else "") + react_grid["name"][i] for i in range(0, len(react_grid["name"])) if react_grid["coeff"][i] < 0]) products = " + ".join([(str(int(react_grid["coeff"][i]) if react_grid["coeff"][i].is_integer() else react_grid["coeff"][i])+" " if react_grid["coeff"][i] != 1 else "") + react_grid["name"][i] for i in range(0, len(react_grid["name"])) if react_grid["coeff"][i] > 0]) if formatted: reactants = " + ".join([format_coeff(react_grid["coeff"][i]) + html_chemname_format_AqEquil(react_grid["name"][i], charge_sign_at_end=charge_sign_at_end) for i in range(0, len(react_grid["name"])) if react_grid["coeff"][i] < 0]) products = " + ".join([format_coeff(react_grid["coeff"][i]) + html_chemname_format_AqEquil(react_grid["name"][i], charge_sign_at_end=charge_sign_at_end) for i in range(0, len(react_grid["name"])) if react_grid["coeff"][i] > 0]) reaction = reactants + " = " + products reactions.append(reaction) self.affinity_energy_formatted_reactions["reaction"] = reactions[1:] + reactions[:1] # because reactions got rotated with respect to reaction names, rotate the other way self.affinity_energy_formatted_reactions.index = main_rxn_names else: reactions = [] for irow in range(0, df.shape[0]): redox_pair = df.loc[self.affinity_energy_reactions_table.index[irow], "redox_pairs"] oxidant = redox_pair[0] reductant = redox_pair[1] rxn_row = df.iloc[irow, 2:] rxn = rxn_row[rxn_row.notna()] coeffs = copy.copy(rxn[::2]).tolist() names = copy.copy(rxn[1::2]).tolist() react_grid = pd.DataFrame({"coeff":coeffs, "name":names}) react_grid["coeff"] = pd.to_numeric(react_grid["coeff"]) react_grid = react_grid.astype({'coeff': 'float'}) reactants = " + ".join([(str(-int(react_grid["coeff"][i]) if react_grid["coeff"][i].is_integer() else -react_grid["coeff"][i])+" " if -react_grid["coeff"][i] != 1 else "") + react_grid["name"][i] for i in range(0, len(react_grid["name"])) if react_grid["coeff"][i] < 0]) products = " + ".join([(str(int(react_grid["coeff"][i]) if react_grid["coeff"][i].is_integer() else react_grid["coeff"][i])+" " if react_grid["coeff"][i] != 1 else "") + react_grid["name"][i] for i in range(0, len(react_grid["name"])) if react_grid["coeff"][i] > 0]) if formatted: reactants = " + ".join([format_coeff(react_grid["coeff"][i]) + html_chemname_format_AqEquil(react_grid["name"][i], charge_sign_at_end=charge_sign_at_end) for i in range(0, len(react_grid["name"])) if react_grid["coeff"][i] < 0]) products = " + ".join([format_coeff(react_grid["coeff"][i]) + html_chemname_format_AqEquil(react_grid["name"][i], charge_sign_at_end=charge_sign_at_end) for i in range(0, len(react_grid["name"])) if react_grid["coeff"][i] > 0]) reaction = reactants + " = " + products reactions.append(reaction) self.affinity_energy_formatted_reactions["reaction"] = reactions df_out = copy.copy(self.affinity_energy_formatted_reactions) if hide_subreactions and not simplify: df_out = self.affinity_energy_formatted_reactions.loc[[ind for ind in self.affinity_energy_formatted_reactions.index if "_sub" not in ind[-4:]]] if isnotebook() and show: display(HTML(df_out.to_html(escape=False))) return df_outMethods
def create_data0(self, db, filename, filename_ss=None, data0_formula_ox_name=None, suppress_redox=[], water_model='SUPCRT92', exceed_Ttr=True, grid_temps=[0.01, 50.0, 100.0, 150.0, 200.0, 250.0, 300.0, 350.0], grid_press='Psat', infer_formula_ox=False, generate_template=True, template_name=None, template_type='strict', verbose=1)-
Create a data0 file from a custom thermodynamic dataset.
Parameters
db:str- Desired three letter code of data0 output.
filename:str- Name of csv file containing thermodynamic data in the OBIGT format.
filename_ss:str, optional- Name of file containing solid solution parameters.
data0_formula_ox_name:str, optional- Name of supplementary file containing data0 parameters and inferred
formula oxidation states. Ignored if
infer_formula_oxis False. Seeinfer_formula_oxfor more detail. suppress_redox:listofstr, default[]- Suppress equilibrium between oxidation states of listed elements (Cl, H, and O cannot be included).
water_model:str, default"SUPCRT92"- This is an experimental feature that is not yet fully supported. Desired water model. Can be either "SUPCRT92", "IAPWS95", or "DEW". These models are described here: http://chnosz.net/manual/water.html
exceed_Ttr:bool, defaultTrue- Calculate Gibbs energies of mineral phases and other species beyond their transition temperatures?
grid_temps:listofeight float, default[0.0100, 50.0000, 100.0000, 150.0000, 200.0000, 250.0000, 300.0000, 350.0000]- Eight temperature values that make up the T-P grid.
grid_press:listoffloat, default"Psat"- Eight pressure values that make up the T-P grid. "Psat" for calculations along the liquid-vapor saturation curve.
infer_formula_ox:bool, defaultFalse- Create a supplementary file containing data0 parameters and inferred formula oxidation states? This option is useful for creating as many entries in the formula_ox column when creating a new supplementary file. Note that compounds like DySO4+ result in blank entries in formula_ox because the redox states of two elements, Dy and S, would have to be estimated together; S has many oxidation states and Dy's oxidation states are not hard-coded.
generate_template:bool, defaultTrue- Generate a CSV sample input template customized to this data0? Columns include 'Sample', 'Temperature', 'logfO2', and all strict basis species.
template_name:str, optional- Name of the sample input template file generated. If no name is
supplied, defaults to 'sample_template_xyz.csv', where 'xyz' is
the three letter code given to
db. Ignored ifgenerate_templateis False. template_type:str, either 'strict', 'all basis',or'all species'- Determines which columns are written to the sample template.
- 'strict' includes strict basis species
- 'all basis' includes strict and auxiliary basis species
- 'all species' includes all species in the thermodynamic database
Ignored if
generate_templateis False. verbose:int, 0, 1,or2, default1- Level determining how many messages are returned during a calculation. 2 for all messages, 1 for errors or warnings only, 0 for silent.
Expand source code
def create_data0(self, db, filename, filename_ss=None, data0_formula_ox_name=None, suppress_redox=[], water_model="SUPCRT92", exceed_Ttr=True, grid_temps=[0.0100, 50.0000, 100.0000, 150.0000, 200.0000, 250.0000, 300.0000, 350.0000], grid_press="Psat", infer_formula_ox=False, generate_template=True, template_name=None, template_type="strict", verbose=1): """ Create a data0 file from a custom thermodynamic dataset. Parameters ---------- db : str Desired three letter code of data0 output. filename : str Name of csv file containing thermodynamic data in the OBIGT format. filename_ss : str, optional Name of file containing solid solution parameters. data0_formula_ox_name : str, optional Name of supplementary file containing data0 parameters and inferred formula oxidation states. Ignored if `infer_formula_ox` is False. See `infer_formula_ox` for more detail. suppress_redox : list of str, default [] Suppress equilibrium between oxidation states of listed elements (Cl, H, and O cannot be included). water_model : str, default "SUPCRT92" This is an experimental feature that is not yet fully supported. Desired water model. Can be either "SUPCRT92", "IAPWS95", or "DEW". These models are described here: http://chnosz.net/manual/water.html exceed_Ttr : bool, default True Calculate Gibbs energies of mineral phases and other species beyond their transition temperatures? grid_temps : list of eight float, default [0.0100, 50.0000, 100.0000, 150.0000, 200.0000, 250.0000, 300.0000, 350.0000] Eight temperature values that make up the T-P grid. grid_press : list of float, default "Psat" Eight pressure values that make up the T-P grid. "Psat" for calculations along the liquid-vapor saturation curve. infer_formula_ox : bool, default False Create a supplementary file containing data0 parameters and inferred formula oxidation states? This option is useful for creating as many entries in the formula_ox column when creating a new supplementary file. Note that compounds like DySO4+ result in blank entries in formula_ox because the redox states of two elements, Dy and S, would have to be estimated together; S has many oxidation states and Dy's oxidation states are not hard-coded. generate_template : bool, default True Generate a CSV sample input template customized to this data0? Columns include 'Sample', 'Temperature', 'logfO2', and all strict basis species. template_name : str, optional Name of the sample input template file generated. If no name is supplied, defaults to 'sample_template_xyz.csv', where 'xyz' is the three letter code given to `db`. Ignored if `generate_template` is False. template_type : str, either 'strict', 'all basis', or 'all species' Determines which columns are written to the sample template. - 'strict' includes strict basis species - 'all basis' includes strict and auxiliary basis species - 'all species' includes all species in the thermodynamic database Ignored if `generate_template` is False. verbose : int, 0, 1, or 2, default 1 Level determining how many messages are returned during a calculation. 2 for all messages, 1 for errors or warnings only, 0 for silent. """ # Check that thermodynamic database input files exist and are formatted # correctly. self._check_database_file(filename) if filename_ss != None: self.__file_exists(filename_ss) self.verbose = verbose if self.verbose >= 1: print("Creating data0.{}...".format(db), flush=True) if len(grid_temps) > 8 or len(grid_temps) < 1: self.err_handler.raise_exception("'grid_temps' must have eight values.") if isinstance(grid_press, list): if len(grid_press) > 8 or len(grid_press) < 1: self.err_handler.raise_exception("'grid_press' must have eight values.") if sum([T >= 10000 for T in grid_temps]): self.err_handler.raise_exception("Grid temperatures must be below 10000 °C.") if isinstance(grid_press, list): if sum([P >= 10000 for P in grid_press]): self.err_handler.raise_exception("Grid pressures must be below 10000 bars.") if water_model == "SUPCRT92": min_T = 0 max_T = 2250 min_P = 1 max_P = 30000 elif water_model == "IAPWS95": min_T = 0 max_T = 1000 min_P = 1 max_P = 10000 elif water_model == "DEW": min_T = 0 max_T = 1000 min_P = 1 max_P = 60000 else: self.err_handler.raise_exception("The water model '{}' ".format(water_model)+"is not " "recognized. Try 'SUPCRT92', 'IAPWS95', or 'DEW'.") # check that T and P are above minimum values if sum([T <= min_T for T in grid_temps]): print("WARNING: one or more temperatures in 'grid_temps' is below " "or equal to {} °C".format(min_T)+" and is outside the valid " "temperature range for the {} water model.".format(water_model)) if isinstance(grid_press, list): if sum([P < min_P for P in grid_press]): print("WARNING: one or more pressures in 'grid_press' is below " "{} bar".format(min_P)+", the minimum valid " "pressure for the {} water model.".format(water_model)) # check that T and P are below maximum values if sum([T > max_T for T in grid_temps]): print("WARNING: one or more temperatures in 'grid_temps' is above " "{} °C".format(max_T)+", the maximum valid " "temperature for the {} water model.".format(water_model)) if isinstance(grid_press, list): if sum([P > max_P for P in grid_press]): print("WARNING: one or more pressures in 'grid_press' is above " "{} bar".format(max_P)+", the maximum valid " "pressure for the {} water model.".format(water_model)) if water_model != "SUPCRT92": print("WARNING: water models other than SUPCRT92 are not yet fully supported.") template = pkg_resources.resource_string( __name__, 'data0.min').decode("utf-8") grid_temps = convert_to_RVector(grid_temps) grid_press = convert_to_RVector(grid_press) suppress_redox = convert_to_RVector(suppress_redox) if filename_ss == None: filename_ss = ro.r("NULL") if data0_formula_ox_name == None: data0_formula_ox_name = ro.r("NULL") if template_name == None: template_name = "sample_template_{}.csv".format(db) if template_type not in ['strict', 'all basis', 'all species']: self.err_handler.raise_exception("template_type {} ".format(template_type)+"is not" "recognized. Try 'strict', 'all basis', or 'all species'") self.__capture_r_output() r_create_data0 = pkg_resources.resource_string( __name__, 'create_data0.r').decode("utf-8") ro.r(r_create_data0) ro.r.main_create_data0(filename=filename, filename_ss=filename_ss, grid_temps=grid_temps, grid_press=grid_press, db=db, water_model=water_model, template=template, exceed_Ttr=exceed_Ttr, data0_formula_ox_name=data0_formula_ox_name, suppress_redox=suppress_redox, infer_formula_ox=infer_formula_ox, generate_template=generate_template, template_name=template_name, template_type=template_type, verbose=self.verbose) for line in self.stderr: print(line) if self.verbose > 0: print("Finished creating data0.{}.".format(db)) def make_redox_reactions(self, redox_pairs='all')-
Generate an organized collection of redox reactions for calculating chemical affinity and energy supply values during speciation.
Parameters
redox_pairs:listofintor"all", default"all"- List of indices of half reactions in the half cell reaction table
to be combined when generating full redox reactions.
E.g. [0, 1, 4] will combine half reactions with indices 0, 1, and 4
in the table stored in the
half_cell_reactionsattribute of theAqEquilclass. If "all", generate all possible redox reactions from available half cell reactions.
Returns
Output is stored in the
affinity_energy_reactions_rawandaffinity_energy_reactions_tableattributes of theAqEquilclass.Expand source code
def make_redox_reactions(self, redox_pairs="all"): """ Generate an organized collection of redox reactions for calculating chemical affinity and energy supply values during speciation. Parameters ---------- redox_pairs : list of int or "all", default "all" List of indices of half reactions in the half cell reaction table to be combined when generating full redox reactions. E.g. [0, 1, 4] will combine half reactions with indices 0, 1, and 4 in the table stored in the `half_cell_reactions` attribute of the `AqEquil` class. If "all", generate all possible redox reactions from available half cell reactions. Returns ---------- Output is stored in the `affinity_energy_reactions_raw` and `affinity_energy_reactions_table` attributes of the `AqEquil` class. """ # reset all redox variables stored in the AqEquil class self.affinity_energy_reactions_raw = None self.affinity_energy_reactions_table = None self.affinity_energy_formatted_reactions = None if self.verbose != 0: print("Generating redox reactions...") err_msg = ("redox_pairs can either be 'all' or a list of integers " "indicating the indices of half cell reactions in " "the half_cell_reactions table that should be combined into " "full redox reactions. For example, redox_pairs=[0, 1, 2, 6] " "will combine half cell reactions with indices 0, 1, 2, and 6 in " "the half_cell_reactions table. This table is an attribute in the " "class AqEquil.") if isinstance(redox_pairs, str): if redox_pairs == "all": redox_pairs = list(range(0, self.half_cell_reactions.shape[0])) else: self.err_handler.raise_exception(err_msg) elif isinstance(redox_pairs, list): if not all([isinstance(i, int) for i in redox_pairs]): self.err_handler.raise_exception(err_msg) else: self.err_handler.raise_exception(err_msg) self.redox_pairs = redox_pairs df = self.half_cell_reactions.iloc[redox_pairs].reset_index(drop=True) Oxidant_1 = df['Oxidant_1'] Oxidant_2 = df['Oxidant_2'] Oxidant_3 = df['Oxidant_3'] Reductant_1 = df['Reductant_1'] Reductant_2 = df['Reductant_2'] #CREATING A LIST OF ALL SPECIES AND THEIR ELEMENT DICTIONARIES df_oxidants_and_reductants = self.half_cell_reactions.loc[:, "Oxidant_1":].values.tolist() flat_list = flatten_list(df_oxidants_and_reductants) oxidants_and_reductants = [sp for sp in list(dict.fromkeys(flat_list)) if str(sp) != 'nan'] elements = [list(parse_formula(sp).keys()) for sp in oxidants_and_reductants] elements = flatten_list(elements) elements = list(dict.fromkeys(elements)) # get a dictionary of species and their elemental makeup: # {'HNO3': {'H': 1.0, 'N': 1.0, 'O': 3.0, '-': 0, 'Fe': 0, 'C': 0, 'S': 0, '+': 0}, ...} element_dictionary = dict() for sp in oxidants_and_reductants: element_dictionary[sp] = parse_formula(sp) for e in elements: if element_dictionary[sp].get(e, 0) == 0: element_dictionary[sp][e] = 0 df_reax = pd.DataFrame() # empty df of reactions df_reax['rO_coeff'] = '' df_reax['rO'] = '' df_reax['rR_coeff'] = '' df_reax['rR'] = '' df_reax['pO_coeff'] = '' df_reax['pO'] = '' df_reax['pR_coeff'] = '' df_reax['pR'] = '' df_reax['Reaction'] = '' index = 0 reaction = [] indices = np.arange(0, len(df['Oxidant_1']), 1).tolist()*2 # how to loop back through the redox pairs to not run into out-of-range index rxn_num = 0 # counting unique reactions rxn_list = [] # list of unique reactions rxn_names = [] rxn_pairs = [] # list of paired half reactions for i in range(0, len(df['Oxidant_1']),1): # length of redox pairs - columns for n in range(0, len(df['Oxidant_1']), 1): if Reductant_1[i] == Reductant_1[indices[i+n]] or Reductant_1[i] == Reductant_2[indices[i+n]]: # if both reductants are the same thing, skip continue if Oxidant_1[i] == Oxidant_1[indices[i+n]] or Oxidant_1[i] == Oxidant_2[indices[i+n]] or Oxidant_1[i] == Oxidant_3[indices[i+n]]: continue # if Oxidant_1[i] == 'H2O' and Reductant_1[indices[i+n]] == 'H2O': #suppress the splitting of water # continue else: # GENERATING REACTIONS BETWEEN OXIDANT_1 AND REDUCTANT_1 reaction.append(Oxidant_1[i]+' \t ' + Reductant_1[indices[i+n]] + '=' + Reductant_1[i] + ' \t ' + Oxidant_1[indices[i+n]]) rxn_num+=1 rxn_list.append(rxn_num) rxn_names.append('red_'+Oxidant_1[i]+'_'+Reductant_1[i]+'_ox_'+Reductant_1[indices[i+n]]+'_'+Oxidant_1[indices[i+n]]) df_reax.loc[index, 'rO'] = Oxidant_1[i] df_reax.loc[index, 'rR'] = Reductant_1[indices[i+n]] df_reax.loc[index, 'pR'] = Reductant_1[i] df_reax.loc[index, 'pO'] = Oxidant_1[indices[i+n]] rxn_pairs.append([i, indices[i+n]]) index+=1 # REACTIONS INVOLVING OTHER PH-DEPENDENT SPECIES if pd.isnull(Oxidant_2[i]) != True: reaction.append(Oxidant_2[i] + ' \t ' + Reductant_1[indices[i+n]] + '=' + Reductant_1[i] + ' \t ' + Oxidant_1[indices[i+n]]) rxn_list.append(rxn_num) rxn_names.append('red_'+Oxidant_1[i]+'_'+Reductant_1[i]+'_ox_'+Reductant_1[indices[i+n]]+'_'+Oxidant_1[indices[i+n]]) df_reax.loc[index, 'rO'] = Oxidant_2[i] df_reax.loc[index, 'rR'] = Reductant_1[indices[i+n]] df_reax.loc[index, 'pR'] = Reductant_1[i] df_reax.loc[index, 'pO'] = Oxidant_1[indices[i+n]] rxn_pairs.append([i, indices[i+n]]) index +=1 if pd.isnull(Reductant_2[indices[i+n]]) != True: reaction.append(Oxidant_2[i] + ' \t ' + Reductant_2[indices[i+n]] + '=' + Reductant_1[i] + ' \t ' + Oxidant_1[indices[i+n]]) rxn_list.append(rxn_num) rxn_names.append('red_'+Oxidant_1[i]+'_'+Reductant_1[i]+'_ox_'+Reductant_1[indices[i+n]]+'_'+Oxidant_1[indices[i+n]]) df_reax.loc[index, 'rO'] = Oxidant_2[i] df_reax.loc[index, 'rR'] = Reductant_2[indices[i+n]] df_reax.loc[index, 'pR'] = Reductant_1[i] df_reax.loc[index, 'pO'] = Oxidant_1[indices[i+n]] rxn_pairs.append([i, indices[i+n]]) index +=1 if pd.isnull(Reductant_2[indices[i+n]]) != True: reaction.append(Oxidant_1[i] + ' \t ' + Reductant_2[indices[i+n]] + '=' + Reductant_1[i] + ' \t ' + Oxidant_1[indices[i+n]]) rxn_list.append(rxn_num) rxn_names.append('red_'+Oxidant_1[i]+'_'+Reductant_1[i]+'_ox_'+Reductant_1[indices[i+n]]+'_'+Oxidant_1[indices[i+n]]) df_reax.loc[index, 'rO'] = Oxidant_1[i] df_reax.loc[index, 'rR'] = Reductant_2[indices[i+n]] df_reax.loc[index, 'pR'] = Reductant_1[i] df_reax.loc[index, 'pO'] = Oxidant_1[indices[i+n]] rxn_pairs.append([i, indices[i+n]]) index +=1 if pd.isnull(Oxidant_3[i]) != True: reaction.append(Oxidant_3[i] + ' \t ' + Reductant_1[indices[i+n]] + '=' + Reductant_1[i] + ' \t ' + Oxidant_1[indices[i+n]]) rxn_list.append(rxn_num) rxn_names.append('red_'+Oxidant_1[i]+'_'+Reductant_1[i]+'_ox_'+Reductant_1[indices[i+n]]+'_'+Oxidant_1[indices[i+n]]) df_reax.loc[index, 'rO'] = Oxidant_3[i] df_reax.loc[index, 'rR'] = Reductant_1[indices[i+n]] df_reax.loc[index, 'pR'] = Reductant_1[i] df_reax.loc[index, 'pO'] = Oxidant_1[indices[i+n]] rxn_pairs.append([i, indices[i+n]]) index +=1 if pd.isnull(Reductant_2[indices[i+n]]) != True: reaction.append(Oxidant_3[i] + ' \t ' + Reductant_2[indices[i+n]] + '=' + Reductant_1[i] + ' \t ' + Oxidant_1[indices[i+n]]) rxn_list.append(rxn_num) rxn_names.append('red_'+Oxidant_1[i]+'_'+Reductant_1[i]+'_ox_'+Reductant_1[indices[i+n]]+'_'+Oxidant_1[indices[i+n]]) df_reax.loc[index, 'rO'] = Oxidant_3[i] df_reax.loc[index, 'rR'] = Reductant_2[indices[i+n]] df_reax.loc[index, 'pR'] = Reductant_1[i] df_reax.loc[index, 'pO'] = Oxidant_1[indices[i+n]] rxn_pairs.append([i, indices[i+n]]) index +=1 # rxn_pairs_orig = [[redox_pairs[pair[0]], redox_pairs[pair[1]]] for pair in rxn_pairs] # self.rxn_pairs_main = np.unique(np.array(rxn_pairs_orig), axis=0).tolist() df_reax['Reaction'] = rxn_list df_reax['Names'] = rxn_names df_reax['Temp_Pairs'] = rxn_pairs # if there are no reactions, return nothing if df_reax.shape[0] == 0: incompatible_half_reactions = pd.Series(self.half_cell_reactions["Redox Couple"], index=redox_pairs).tolist() redundant_reductant_or_oxidant = [] for col in ["Oxidant_1", "Oxidant_2", "Oxidant_3", "Reductant_1", "Reductant_2"]: redox_col = pd.Series(self.half_cell_reactions[col], index=[0,1]) if redox_col.eq(redox_col[0]).all(): redundant_reductant_or_oxidant.append(redox_col[0]) err_no_rxns = ("Valid reactions could not be written between the half " "reactions {} ".format(incompatible_half_reactions)+"because " "{}".format(redundant_reductant_or_oxidant)+" is on both sides " "of all reactions.") print(err_no_rxns) return ### BALANCING NON-O, H ELEMENTS for r in range(0, len(df_reax['rO'])): count = 0 #to restart the loop through the elements temp_rO_coeff = [1] *4 #C, N, S, Fe temp_rR_coeff = [1] *4 #C, N, S, Fe temp_pO_coeff = [1] *4 #C, N, S, Fe temp_pR_coeff = [1] *4 #C, N, S, Fe for e in ['C','N','S','Fe']: temp1 = int(element_dictionary[df_reax['rO'][r]][e]) #count for the element in the list for rO at index r temp2 = int(element_dictionary[df_reax['pR'][r]][e]) temp3 = int(element_dictionary[df_reax['rR'][r]][e]) temp4 = int(element_dictionary[df_reax['pO'][r]][e]) if temp1 == temp2: temp_rO_coeff[count] = 1 temp_pR_coeff[count] = 1 if temp1 != temp2: if temp1 ==0: temp_rO_coeff[count] = 1 if temp1 != 0: temp_rO_coeff[count] = np.lcm(temp1,temp2)/temp1 if temp2 == 0: temp_pR_coeff[count] = 1 if temp2 != 0: temp_pR_coeff[count] = np.lcm(temp1,temp2)/temp2 if temp3 == temp4: temp_rR_coeff[count] = 1 temp_pO_coeff[count] = 1 if temp4 != temp3: if temp3 == 0: temp_rR_coeff[count] = 1 if temp3 != 0: temp_rR_coeff[count] = np.lcm(temp3,temp4)/temp3 if temp4 == 0.0: temp_pO_coeff[count] = 1 if temp4 !=0.0: temp_pO_coeff[count] = np.lcm(temp3,temp4)/temp4 count +=1 df_reax.loc[r, 'rO_coeff'] = -max(temp_rO_coeff) df_reax.loc[r, 'rR_coeff'] = -max(temp_rR_coeff) df_reax.loc[r, 'pR_coeff'] = max(temp_pR_coeff) df_reax.loc[r, 'pO_coeff'] = max(temp_pO_coeff) all_reax = df_reax.copy(deep=True) all_reax['rO_2_coeff'] = '' all_reax['rO_2'] = '' all_reax['rO_3_coeff'] = '' all_reax['rO_3'] = '' all_reax['rR_2_coeff'] = '' all_reax['rR_2'] = '' ### MAIN REACTION for r in range(1, max(all_reax['Reaction']+1)): # each reaction number once, 1 to 305 if len(all_reax[all_reax['Reaction']==r].index.values) == 1: # if nothing to combine, skip continue else: temp = all_reax[all_reax['Reaction']==r].index.values[0] #index of first instance of this reaction which has multiple subreactions lst2 = [] all_reax.loc[temp-0.5] = all_reax.loc[temp] # replicating the row to build on all_reax = all_reax.sort_index() # putting the replicated row above the first instance for i in all_reax[all_reax['Reaction']==r].index.values[2:]: #all but the first instance in the reactions (since that's copied already) if all_reax.loc[i, 'rO'] != all_reax.loc[temp, 'rO'] and all_reax.loc[i, 'rO'] not in lst2: # if rO is new (and not the same as the first) lst2.append(all_reax.loc[i, 'rO']) # list unique rO besides the first for l in range(0, len(lst2)): # looping through unique rO temp2 = 'rO_'+str(2+int(l)) # adding 0 or 1 to the rO number all_reax.loc[temp-0.5,str(temp2)] = lst2[l] # add the unique rO to rO_2 or 3 if all_reax.loc[i, 'rR'] != all_reax.loc[temp, 'rR']: # if rR is new all_reax.loc[temp-0.5,'rR_2'] = all_reax.loc[i, 'rR'] # add it to rR_2 ##CHANGING COEFFICIENTS rO = all_reax.loc[temp-0.5,'rO'] #assigning easy variables rO_2 = all_reax.loc[temp-0.5,'rO_2'] rO_3 = all_reax.loc[temp-0.5,'rO_3'] rR = all_reax.loc[temp-0.5,'rR'] rR_2 = all_reax.loc[temp-0.5,'rR_2'] rO_coeff = all_reax.loc[temp-0.5,'rO_coeff'] #these are empty at the moment rO_2_coeff = all_reax.loc[temp-0.5,'rO_2_coeff'] rO_3_coeff = all_reax.loc[temp-0.5,'rO_3_coeff'] rR_2_coeff = all_reax.loc[temp-0.5,'rR_2_coeff'] rR_coeff = all_reax.loc[temp-0.5,'rR_coeff'] if rO_3 != '' and rO_2 != '': #if DIC is the oxidant all_reax.loc[temp-0.5,'rO_coeff'] = rO_coeff/3 #this works fine all_reax.loc[temp-0.5,'rO_2_coeff'] = rO_coeff/3 all_reax.loc[temp-0.5,'rO_3_coeff'] = rO_coeff/3 if rR_2 != '': all_reax.loc[temp-0.5,'rR_coeff'] = rR_coeff/2 #this works fine all_reax.loc[temp-0.5,'rR_2_coeff'] = rR_coeff/2 all_reax.loc[temp-0.4] = all_reax.loc[temp-0.5] #NEW ROW WITH ONLY rR - this works fine : line 4 all_reax.loc[temp-0.4, 'rR_2_coeff'] = 0 all_reax.loc[temp-0.4, 'rR_coeff'] = rR_coeff all_reax.loc[temp-0.3] = all_reax.loc[temp-0.5] #NEW ROW WITH ONLY rR_2 - this works fine: line 5 all_reax.loc[temp-0.3, 'rR_2_coeff'] = rR_coeff all_reax.loc[temp-0.3, 'rR_coeff'] = 0 #NEW ROWS WITH TWO DIC AND BOTH rR all_reax.loc[temp-0.25] = all_reax.loc[temp-0.5] #eliminate CO2 all_reax.loc[temp-0.25, 'rO_coeff'] = 0 all_reax.loc[temp-0.25, 'rO_2_coeff'] = rO_coeff/2 all_reax.loc[temp-0.25, 'rO_3_coeff'] = rO_coeff/2 all_reax.loc[temp-0.24] = all_reax.loc[temp-0.5] #eliminate HCO3- all_reax.loc[temp-0.24, 'rO_2_coeff'] = 0 all_reax.loc[temp-0.24, 'rO_coeff'] = rO_coeff/2 all_reax.loc[temp-0.24, 'rO_3_coeff'] = rO_coeff/2 all_reax.loc[temp-0.23] = all_reax.loc[temp-0.5] #eliminate CO3-2 all_reax.loc[temp-0.23, 'rO_3_coeff'] = 0 all_reax.loc[temp-0.23, 'rO_2_coeff'] = rO_coeff/2 all_reax.loc[temp-0.23, 'rO_coeff'] = rO_coeff/2 all_reax.loc[temp-0.2] = all_reax.loc[temp-0.4] #NEW ROW WITH ONLY rR ELIMINATING CO2: line 6 all_reax.loc[temp-0.2, 'rO_coeff'] = 0 all_reax.loc[temp-0.2, 'rO_2_coeff'] = rO_coeff/2 all_reax.loc[temp-0.2, 'rO_3_coeff'] = rO_coeff/2 all_reax.loc[temp-0.1] = all_reax.loc[temp-0.4] #NEW ROW WITH ONLY rR ELIMINATING HCO3-: line 7 all_reax.loc[temp-0.1, 'rO_coeff'] = rO_coeff/2 all_reax.loc[temp-0.1, 'rO_2_coeff'] = 0 all_reax.loc[temp-0.1, 'rO_3_coeff'] = rO_coeff/2 all_reax.loc[temp-0.05] = all_reax.loc[temp-0.4] #NEW ROW WITH ONLY rR ELIMINATING CO3-2: line 8 all_reax.loc[temp-0.05, 'rO_coeff'] = rO_coeff/2 all_reax.loc[temp-0.05, 'rO_2_coeff'] = rO_coeff/2 all_reax.loc[temp-0.05, 'rO_3_coeff'] = 0 all_reax.loc[temp-0.04] = all_reax.loc[temp-0.3] #NEW ROW WITH ONLY rR_2 ELIMINATING CO2: line 9 all_reax.loc[temp-0.04, 'rO_coeff'] = 0 all_reax.loc[temp-0.04, 'rO_2_coeff'] = rO_coeff/2 all_reax.loc[temp-0.04, 'rO_3_coeff'] = rO_coeff/2 all_reax.loc[temp-0.03] = all_reax.loc[temp-0.3] #NEW ROW WITH ONLY rR_2 ELIMINATING HCO3-: line 10 all_reax.loc[temp-0.03, 'rO_coeff'] = rO_coeff/2 all_reax.loc[temp-0.03, 'rO_2_coeff'] = 0 all_reax.loc[temp-0.03, 'rO_3_coeff'] = rO_coeff/2 all_reax.loc[temp-0.02] = all_reax.loc[temp-0.3] #NEW ROW WITH ONLY rR_2 ELIMINATING CO3-2: line 11 all_reax.loc[temp-0.02, 'rO_coeff'] = rO_coeff/2 all_reax.loc[temp-0.02, 'rO_2_coeff'] = rO_coeff/2 all_reax.loc[temp-0.02, 'rO_3_coeff'] = 0 all_reax.loc[temp-0.01] = all_reax.loc[temp-0.5] #NEW ROW WITH ONLY CO2 and both RR all_reax.loc[temp-0.01, 'rO_coeff'] = rO_coeff all_reax.loc[temp-0.01, 'rO_2_coeff'] = 0 all_reax.loc[temp-0.01, 'rO_3_coeff'] = 0 all_reax.loc[temp-0.009] = all_reax.loc[temp-0.5] #NEW ROW WITH ONLY HCO3- and both RR all_reax.loc[temp-0.009, 'rO_coeff'] = 0 ### all_reax.loc[temp-0.009, 'rO_2_coeff'] = rO_coeff all_reax.loc[temp-0.009, 'rO_3_coeff'] = 0 all_reax.loc[temp-0.008] = all_reax.loc[temp-0.5] #NEW ROW WITH ONLY CO2 and both RR all_reax.loc[temp-0.008, 'rO_coeff'] = 0 all_reax.loc[temp-0.008, 'rO_2_coeff'] = 0 all_reax.loc[temp-0.008, 'rO_3_coeff'] = rO_coeff if rR_2 == '': all_reax.loc[temp-0.2] = all_reax.loc[temp-0.5] #NEW ROW ELIMINATING CO2 all_reax.loc[temp-0.2, 'rO_coeff'] = 0 all_reax.loc[temp-0.2, 'rO_2_coeff'] = rO_coeff/2 all_reax.loc[temp-0.2, 'rO_3_coeff'] = rO_coeff/2 all_reax.loc[temp-0.1] = all_reax.loc[temp-0.5] #NEW ROW ELIMINATING HCO3- all_reax.loc[temp-0.1, 'rO_coeff'] = rO_coeff/2 all_reax.loc[temp-0.1, 'rO_2_coeff'] = 0 all_reax.loc[temp-0.1, 'rO_3_coeff'] = rO_coeff/2 all_reax.loc[temp-0.05] = all_reax.loc[temp-0.5] #NEW ROW ELIMINATING CO3-2 all_reax.loc[temp-0.05, 'rO_coeff'] = rO_coeff/2 all_reax.loc[temp-0.05, 'rO_2_coeff'] = rO_coeff/2 all_reax.loc[temp-0.05, 'rO_3_coeff'] = 0 if rO_2 != '' and rO_3 == '': # IF THERE ARE TWO OXIDANT OPTIONS all_reax.loc[temp-0.5,'rO_coeff'] = rO_coeff/2 all_reax.loc[temp-0.5,'rO_2_coeff'] = rO_coeff/2 if rR_2 != '': #IF THERE ARE TWO REDUCTANT OPTIONS all_reax.loc[temp-0.5,'rR_coeff'] = rR_coeff/2 all_reax.loc[temp-0.5,'rR_2_coeff'] = rR_coeff/2 all_reax.loc[temp-0.4] = all_reax.loc[temp-0.5] #NEW ROW WITH ONLY rR all_reax.loc[temp-0.4, 'rR_2_coeff'] = 0 all_reax.loc[temp-0.4, 'rR_coeff'] = rR_coeff all_reax.loc[temp-0.3] = all_reax.loc[temp-0.5] #NEW ROW WITH ONLY rR_2 all_reax.loc[temp-0.3, 'rR_2_coeff'] = rR_coeff all_reax.loc[temp-0.3, 'rR_coeff'] = 0 all_reax.loc[temp-0.2] = all_reax.loc[temp-0.5] #NEW ROW WITH ONLY rO_2 all_reax.loc[temp-0.2, 'rO_2_coeff'] = rO_coeff all_reax.loc[temp-0.2, 'rO_coeff'] = 0 all_reax.loc[temp-0.1] = all_reax.loc[temp-0.5] #NEW ROW WITH ONLY rO all_reax.loc[temp-0.1, 'rO_2_coeff'] = 0 all_reax.loc[temp-0.1, 'rO_coeff'] = rO_coeff if rO_2 == '' and rO_3 == '' and rR_2 != '': # IF THERE IS ONLY ONE OXIDANT BUT TWO REDUCTANTS all_reax.loc[temp-0.5,'rR_coeff'] = rR_coeff/2 all_reax.loc[temp-0.5,'rR_2_coeff'] = rR_coeff/2 all_reax = all_reax.sort_index().reset_index(drop=True) pair_list = [] for i in range(0, len(all_reax['Temp_Pairs'])): pair_list.append([redox_pairs[all_reax.loc[i, 'Temp_Pairs'][0]], redox_pairs[all_reax.loc[i, 'Temp_Pairs'][1]]]) all_reax['pairs'] = pair_list reax = all_reax.copy(deep=True) reax.drop('Temp_Pairs', axis=1, inplace=True) reax.drop('pairs', axis=1, inplace=True) reax.reset_index(drop=True, inplace=True) for s in ['Fe', 'O', 'H','-','+']: for i in range(0,len(reax['rO'])): ### assigning temporary values to each species temp_rO_coeff = reax['rO_coeff'][i] temp_rO = element_dictionary[reax['rO'][i]][s] if reax['rO_2_coeff'][i] != '': temp_rO_2_coeff = reax['rO_2_coeff'][i] temp_rO_2 = element_dictionary[reax['rO_2'][i]][s] else: temp_rO_2_coeff = 0 temp_rO_2 = 0 if reax['rO_3_coeff'][i] != '': temp_rO_3_coeff = reax['rO_3_coeff'][i] temp_rO_3 = element_dictionary[reax['rO_3'][i]][s] else: temp_rO_3_coeff = 0 temp_rO_3 = 0 temp_rR_coeff = reax['rR_coeff'][i] temp_rR = element_dictionary[reax['rR'][i]][s] if reax['rR_2_coeff'][i] != '': temp_rR_2_coeff = reax['rR_2_coeff'][i] temp_rR_2 = element_dictionary[reax['rR_2'][i]][s] else: temp_rR_2_coeff = 0 temp_rR_2 = 0 temp_pO_coeff = reax['pO_coeff'][i] temp_pO = element_dictionary[reax['pO'][i]][s] temp_pR_coeff = reax['pR_coeff'][i] temp_pR = element_dictionary[reax['pR'][i]][s] r = 0-(temp_rO*temp_rO_coeff+temp_pR*temp_pR_coeff+temp_rO_2*temp_rO_2_coeff+temp_rO_3*temp_rO_3_coeff) o = 0-(temp_rR*temp_rR_coeff+temp_rR_2*temp_rR_2_coeff+temp_pO*temp_pO_coeff) reax.loc[i, 'r_'+s] = r reax.loc[i, 'o_'+s] = o reax['r_H'] = reax['r_H'] - 2*reax['r_O'] reax['o_H'] = reax['o_H'] - 2*reax['o_O'] reax['r_+'] = reax['r_+'] - 2*reax['r_Fe'] - reax['r_H'] reax['o_+'] = reax['o_+'] - 2*reax['o_Fe'] - reax['o_H'] reax['r_e-'] = reax['r_+'] - reax['r_-'] reax['o_e-'] = reax['o_+'] - reax['o_-'] reax.rename({'r_Fe': 'r_Fe+2', 'r_O': 'r_H2O', 'r_H': 'r_H+', 'o_Fe': 'o_Fe+2', 'o_O': 'o_H2O', 'o_H': 'o_H+'}, axis=1, inplace = True) ### MULTIPLYING SUB-REACTIONS lcm_charge = [] electrons = [] for i in range(0, len(reax['rO'])): # # for i in range(0, 10): lcm_charge = np.lcm(round(reax['r_e-'][i]), round(reax['o_e-'][i])) electrons.append(str(lcm_charge)+'e') r_multiplier = abs(lcm_charge/int(reax['r_e-'][i])) o_multiplier = abs(lcm_charge/int(reax['o_e-'][i])) for r in ['rO_coeff', 'pR_coeff', 'r_H2O', 'r_Fe+2', 'r_H+']: reax.loc[i, r] = reax.loc[i, r]*r_multiplier for o in ['rR_coeff', 'pO_coeff', 'o_H2O', 'o_Fe+2', 'o_H+']: reax.loc[i, o] = reax.loc[i, o]*o_multiplier if reax.loc[i, 'rO_2_coeff'] != '': reax.loc[i, 'rO_2_coeff'] = reax.loc[i, 'rO_2_coeff']*r_multiplier if reax.loc[i, 'rO_3_coeff'] != '': reax.loc[i, 'rO_3_coeff'] = reax.loc[i, 'rO_3_coeff']*r_multiplier if reax.loc[i, 'rR_2_coeff'] != '': reax.loc[i, 'rR_2_coeff'] = reax.loc[i, 'rR_2_coeff']*o_multiplier reax['H+'] = reax['r_H+'] + reax['o_H+'] reax['protons'] = 'H+' reax['H2O'] = reax['r_H2O'] + reax['o_H2O'] reax['water'] = 'H2O' reax['Fe+2'] = reax['r_Fe+2'] + reax['o_Fe+2'] reax['iron'] = 'Fe+2' reax.drop(columns = ['r_Fe+2', 'r_H2O', 'r_H+', 'r_-', 'r_+', 'r_e-', 'o_Fe+2', 'o_H2O', 'o_H+', 'o_-', 'o_+', 'o_e-'], axis=1, inplace=True) old_new_dict = {'SO4-2':'SO4-2', 'HSO4-':'HSO4-','FeS2':'pyrite', 'H2S':'H2S', 'H2O':'H2O', 'O2':'O2', 'H2':'H2', 'Fe2O3':'hematite','Fe3O4':'magnetite', 'CO2':'CO2', 'CH4':'METHANE', 'HCO3-':'HCO3-','CO3-2':'CO3-2', 'NH3':'NH3', 'NH4+':'NH4+', 'NO2-':'NO2-', 'HNO2':'HNO2', 'NO3-':'NO3-', 'HNO3':'HNO3', 'S':'sulfur', 'FeOOH':'goethite', 'CO':'CO', 'H+':'H+', 'Fe+2':'Fe+2', 'HS-':'HS-'} old_new_dict = {old:'start'+new+'end' for old,new in zip(old_new_dict.keys(), old_new_dict.values())} real_reax = reax.replace(old_new_dict.keys(), old_new_dict.values()) count = 0 rxn_count = [] rxn_number = [] for i in real_reax['Reaction']: if i not in rxn_count: rxn_count.append(i) rxn_number.append(real_reax['Names'][count]) name_count = 1 else: rxn_number.append(real_reax['Names'][count]+ '_'+str(name_count)+'_sub') name_count += 1 count += 1 real_reax.insert(0, 'Reaction Number', rxn_number) # # real_reax.insert(1, 'electrons', '1e') real_reax.insert(1, 'electrons', electrons) lst3 = [] #list of reaction numbers lst4 = [] #list of reactions with issues for i in range(0, len(real_reax['Reaction'])): if real_reax['Reaction'][i] not in lst3: lst3.append(real_reax['Reaction'][i]) for j in lst3: #looping through reaction numbers first_e = real_reax.loc[real_reax['Reaction'] == j]['electrons'].reset_index(drop=True)[0] for k in real_reax.loc[real_reax['Reaction'] == j]['electrons'].reset_index(drop=True): if k != first_e: if j not in lst4: lst4.append(j) for l in lst4: print(real_reax.loc[real_reax['Reaction'] == l]) real_reax.drop(labels='Reaction', axis=1, inplace = True) real_reax.drop(columns = 'Names', inplace = True) #real_reax.to_csv('template.txt', sep ='\t', header=False, index=False) file = real_reax.to_csv(sep='\t', header=False, index=False, line_terminator='\n') file = file.split("\n") # Formatting species to preserve charges while using regex package new2 = [] for i in old_new_dict.values(): i = i.replace('+','\+') i = i.replace('-','\-') new2.append(i) newlines = [] for line in file: line = line.strip() for s in new2: number = 0 match = re.findall('\W+\d+\.\d+\t'+s, line) if len(match) > 1: for m in match: pos = re.findall('\t(\d+\.\d+)', m) if len(pos) > 0: number += float(pos[0]) neg = re.findall('-\d+\.\d+', m) if len(neg) > 0: number += float(neg[0]) line = line.replace(m,'') line = line + '\t' + str(number) + '\t' + s new_match = re.findall('\t0\.0\t'+s, line) + re.findall('\t0\t'+s, line) + re.findall('\t0\t'+s+'$', line) if len(new_match) > 0: for n in new_match: line = line.replace(n, '') line = re.sub('\t+', '\t', line) line = line.replace('\t0.0\tH\t', '\t') line = line.replace('\tH\t','\tH+\t') line = line.replace('\\', '') line = line.replace('start', '') line = line.replace('end', '') line = line.strip() newlines.append(line) self.affinity_energy_reactions_raw = "\n".join(newlines) df_rxn = pd.DataFrame([x.split('\t') for x in self.affinity_energy_reactions_raw.split('\n')]) df_rxn.columns = df_rxn.columns.map(str) df_rxn = df_rxn.rename(columns={"0": "reaction_name", "1": "mol_e-_transferred_per_mol_rxn"}) df_rxn.insert(1, 'redox_pairs', all_reax['pairs']) df_rxn = df_rxn.set_index("reaction_name") df_rxn = df_rxn[df_rxn['mol_e-_transferred_per_mol_rxn'].notna()] self.affinity_energy_reactions_table = df_rxn # rxn_pairs_all = [] # n = 0 # prev_was_sub = False # for i, rxn in enumerate(self.affinity_energy_reactions_table.index.tolist()): # if "_sub" not in rxn[-4:]: # if prev_was_sub: # n += 1 # prev_was_sub = False # rxn_pairs_all.append(self.rxn_pairs_main[n]) # else: # rxn_pairs_all.append(self.rxn_pairs_main[n]) # prev_was_sub = True # self.rxn_pairs_all = rxn_pairs_all # self.affinity_energy_reactions_table.insert(0, "redox_pairs", rxn_pairs_all) prev_was_coeff = False n = 1 for col in self.affinity_energy_reactions_table.iloc[:, 2:].columns: if not prev_was_coeff: new_col_name = "coeff_"+str(n) prev_was_coeff = True else: new_col_name = "species_"+str(n) prev_was_coeff = False n += 1 self.affinity_energy_reactions_table = self.affinity_energy_reactions_table.rename(columns={col: new_col_name}) nonsub_reaction_names = [name for name in self.affinity_energy_reactions_table.index if "_sub" not in name[-4:]] if self.verbose != 0: print("{} redox reactions have been generated.".format(len(nonsub_reaction_names))) def runeq3(self, filename_3i, db, samplename=None, path_3i='/var/lib/private/gmboyer/AqEquil-dev', path_3o='/var/lib/private/gmboyer/AqEquil-dev', path_3p='/var/lib/private/gmboyer/AqEquil-dev')-
Call EQ3 on a .3i input file.
Parameters
filename_3i:str- Name of 3i input file.
db:str- Three letter code of database.
path_3i:path str, defaultcurrent working directory- Path of .3i input files.
path_3o:path str, defaultcurrent working directory- Path of .3o output files.
path_3p:path str, defaultcurrent working directory- Path of .3p pickup files.
Expand source code
def runeq3(self, filename_3i, db, samplename=None, path_3i=os.getcwd(), path_3o=os.getcwd(), path_3p=os.getcwd()): """ Call EQ3 on a .3i input file. Parameters ---------- filename_3i : str Name of 3i input file. db : str Three letter code of database. path_3i : path str, default current working directory Path of .3i input files. path_3o : path str, default current working directory Path of .3o output files. path_3p : path str, default current working directory Path of .3p pickup files. """ # get current working dir cwd = os.getcwd() if samplename == None: samplename = filename_3i[:-3] if self.verbose > 0: print('Using ' + db + ' to speciate ' + samplename) os.chdir(path_3i) # step into 3i folder args = ['/bin/csh', self.eq36co+'/runeq3', db, filename_3i] self.__run_script_and_wait(args) # run EQ3 # restore working dir os.chdir(cwd) filename_3o = filename_3i[:-1] + 'o' filename_3p = filename_3i[:-1] + 'p' try: # rename output os.rename(path_3i + '/output', path_3i + "/" + filename_3o) except: if self.verbose > 0: print('Error: EQ3 failed to produce output for ' + filename_3i) try: # move output shutil.move(path_3i + "/" + filename_3o, path_3o + "/" + filename_3o) except: if self.verbose > 0: print('Error: Could not move', filename_3o, "to", path_3o) try: # rename pickup os.rename(path_3i + '/pickup', path_3i + "/" + filename_3p) move_pickup = True except: if self.verbose > 0: print('Error: EQ3 failed to produce a pickup file for ' + filename_3i) move_pickup = False if move_pickup: try: # move pickup shutil.move(path_3i + "/" + filename_3p, path_3p + "/" + filename_3p) except: if self.verbose > 0: print('Error: Could not move', filename_3p, "to", path_3p) def runeq6(self, filename_6i, db, samplename=None, path_6i='/var/lib/private/gmboyer/AqEquil-dev', path_6o='/var/lib/private/gmboyer/AqEquil-dev', path_6p='/var/lib/private/gmboyer/AqEquil-dev')-
Call EQ6 on a .6i input file.
Parameters
filename_6i:str- Name of 6i input file.
db:str- Three letter code of database.
path_6i:path str, defaultcurrent working directory- Path of .6i input files.
path_6o:path str, defaultcurrent working directory- Path of .6o output files.
path_6p:path str, defaultcurrent working directory- Path of .6p pickup files.
Expand source code
def runeq6(self, filename_6i, db, samplename=None, path_6i=os.getcwd(), path_6o=os.getcwd(), path_6p=os.getcwd()): """ Call EQ6 on a .6i input file. Parameters ---------- filename_6i : str Name of 6i input file. db : str Three letter code of database. path_6i : path str, default current working directory Path of .6i input files. path_6o : path str, default current working directory Path of .6o output files. path_6p : path str, default current working directory Path of .6p pickup files. """ # get current working dir cwd = os.getcwd() if samplename == None: samplename = filename_6i[:-3] if self.verbose > 0: print('Using ' + db + ' to speciate ' + samplename) os.chdir(path_6i) # step into 6i folder args = ['/bin/csh', self.eq36co+'/runeq6', db, filename_6i] self.__run_script_and_wait(args) # run EQ6 # restore working dir os.chdir(cwd) filename_6o = filename_6i[:-1] + 'o' filename_6p = filename_6i[:-1] + 'p' try: # rename output os.rename(path_6i + '/output', path_6i + "/" + filename_6o) except: if self.verbose > 0: print('Error: EQ6 failed to produce output for ' + filename_6i) try: # move output shutil.move(path_6i + "/" + filename_6o, path_6o + "/" + filename_6o) except: if self.verbose > 0: print('Error: Could not move', filename_6o, "to", path_6o) try: # rename pickup os.rename(path_6i + '/pickup', path_6i + "/" + filename_6p) move_pickup = True except: if self.verbose > 0: print('Error: EQ6 failed to produce a pickup file for ' + filename_6i) move_pickup = False if move_pickup: try: # move pickup shutil.move(path_6i + "/" + filename_6p, path_6p + "/" + filename_6p) except: if self.verbose > 0: print('Error: Could not move', filename_6p, "to", path_6p) def runeqpt(self, db, extra_eqpt_output=False)-
Convert a data0 into a data1 file with EQPT.
Parameters
db:str- Three letter code of database.
extra_eqpt_output:bool, defaultFalse- Keep additional output files from EQPT? These files include eqpt_log.txt, data1f.txt, and slist.txt.
Expand source code
def runeqpt(self, db, extra_eqpt_output=False): """ Convert a data0 into a data1 file with EQPT. Parameters ---------- db : str Three letter code of database. extra_eqpt_output : bool, default False Keep additional output files from EQPT? These files include eqpt_log.txt, data1f.txt, and slist.txt. """ if os.path.exists("data0."+db) and os.path.isfile("data0."+db): pass else: self.err_handler.raise_exception(" ".join(["Error: could not locate custom database", "data0.{} in {}.".format(db, os.getcwd())])) if os.path.exists("data1."+db) and os.path.isfile("data1."+db): os.remove("data1."+db) self.__clear_eqpt_extra_output() os.environ['EQ36DA'] = os.getcwd() args = ['/bin/csh', self.eq36co+'/runeqpt', db] try: self.__run_script_and_wait(args) # run EQPT except: os.environ['EQ36DA'] = self.eq36da self.err_handler.raise_exception( "Error: EQPT failed to run on {}.".format("data0."+db)) if os.path.exists("data1") and os.path.isfile("data1"): os.rename("data1", "data1."+db) if os.path.exists("output") and os.path.isfile("output"): os.rename("output", "eqpt_log.txt") if os.path.exists("data1f") and os.path.isfile("data1f"): os.rename("data1f", "data1f.txt") if os.path.exists("slist") and os.path.isfile("slist"): os.rename("slist", "slist.txt") if os.path.exists("data1."+db) and os.path.isfile("data1."+db): if self.verbose > 0: print("Successfully created a data1."+db+" from data0."+db) else: msg = ("EQPT could not create data1."+db+" from " "data0."+db+". Check eqpt_log.txt for details.") self.err_handler.raise_exception(msg) if not extra_eqpt_output: self.__clear_eqpt_extra_output() os.environ['EQ36DA'] = self.eq36da # reset default EQ36 db path def show_redox_reactions(self, formatted=True, charge_sign_at_end=False, hide_subreactions=True, simplify=True, show=True)-
Show a table of redox reactions generated with the function
make_redox_reactions.Parameters
formatted:bool, defaultTrue- Should reactions be formatted for html output?
charge_sign_at_end:bool, defaultFalse- Display charge with sign after the number (e.g. SO4 2-)? Ignored if
formattedis False. hide_subreactions:bool, defaultTrue- Hide subreactions?
show:bool, defaultFalse- Show the table of reactions? Ignored if not run in a Jupyter notebook.
Returns
A pandas dataframe containing balanced redox reactions written in full.
Expand source code
def show_redox_reactions(self, formatted=True, charge_sign_at_end=False, hide_subreactions=True, simplify=True, show=True): """ Show a table of redox reactions generated with the function `make_redox_reactions`. Parameters ---------- formatted : bool, default True Should reactions be formatted for html output? charge_sign_at_end : bool, default False Display charge with sign after the number (e.g. SO4 2-)? Ignored if `formatted` is False. hide_subreactions : bool, default True Hide subreactions? show : bool, default False Show the table of reactions? Ignored if not run in a Jupyter notebook. Returns ---------- A pandas dataframe containing balanced redox reactions written in full. """ self.affinity_energy_formatted_reactions = copy.copy(self.affinity_energy_reactions_table.iloc[:, 0:1]) df = copy.copy(self.affinity_energy_reactions_table) if simplify: main_rxn_names = df.loc[[ind for ind in df.index if "_sub" not in ind[-4:]]].index df = df.iloc[[i-1 for i in range(0, len(df.index)) if "_sub" not in df.index[i][-4:]]] self.affinity_energy_formatted_reactions = copy.copy(df.iloc[:, 0:1]) reactions = [] for irow in range(0, df.shape[0]): redox_pair = df.loc[df.index[irow], "redox_pairs"] oxidant_1 = self.half_cell_reactions.loc[self.half_cell_reactions.index[redox_pair[0]], "Oxidant_1"] oxidant_2 = self.half_cell_reactions.loc[self.half_cell_reactions.index[redox_pair[0]], "Oxidant_2"] oxidant_3 = self.half_cell_reactions.loc[self.half_cell_reactions.index[redox_pair[0]], "Oxidant_3"] reductant_1 = self.half_cell_reactions.loc[self.half_cell_reactions.index[redox_pair[1]], "Reductant_1"] reductant_2 = self.half_cell_reactions.loc[self.half_cell_reactions.index[redox_pair[1]], "Reductant_2"] oxidants = [ox for ox in [oxidant_1, oxidant_2, oxidant_3] if str(ox) != 'nan'] reductants = [rd for rd in [reductant_1, reductant_2] if str(rd) != 'nan'] if len(oxidants) > 1: oxidant_sigma_needed = True else: oxidant_sigma_needed = False if len(reductants) > 1: reductant_sigma_needed = True else: reductant_sigma_needed = False rxn_row = df.iloc[irow, 2:] rxn = rxn_row[rxn_row.notna()] coeffs = copy.copy(rxn[::2]).tolist() names = copy.copy(rxn[1::2]).tolist() # print(rxn.name) if oxidant_sigma_needed or reductant_sigma_needed: reactant_names = [names[i] for i in range(0, len(names)) if float(coeffs[i]) < 0] for sp in reactant_names: if sp in oxidants and oxidant_sigma_needed: i = names.index(sp) names[i] = u"\u03A3"+sp if sp in reductants and reductant_sigma_needed: i = names.index(sp) names[i] = u"\u03A3"+sp react_grid = pd.DataFrame({"coeff":coeffs, "name":names}) react_grid["coeff"] = pd.to_numeric(react_grid["coeff"]) react_grid = react_grid.astype({'coeff': 'float'}) reactants = " + ".join([(str(-int(react_grid["coeff"][i]) if react_grid["coeff"][i].is_integer() else -react_grid["coeff"][i])+" " if -react_grid["coeff"][i] != 1 else "") + react_grid["name"][i] for i in range(0, len(react_grid["name"])) if react_grid["coeff"][i] < 0]) products = " + ".join([(str(int(react_grid["coeff"][i]) if react_grid["coeff"][i].is_integer() else react_grid["coeff"][i])+" " if react_grid["coeff"][i] != 1 else "") + react_grid["name"][i] for i in range(0, len(react_grid["name"])) if react_grid["coeff"][i] > 0]) if formatted: reactants = " + ".join([format_coeff(react_grid["coeff"][i]) + html_chemname_format_AqEquil(react_grid["name"][i], charge_sign_at_end=charge_sign_at_end) for i in range(0, len(react_grid["name"])) if react_grid["coeff"][i] < 0]) products = " + ".join([format_coeff(react_grid["coeff"][i]) + html_chemname_format_AqEquil(react_grid["name"][i], charge_sign_at_end=charge_sign_at_end) for i in range(0, len(react_grid["name"])) if react_grid["coeff"][i] > 0]) reaction = reactants + " = " + products reactions.append(reaction) self.affinity_energy_formatted_reactions["reaction"] = reactions[1:] + reactions[:1] # because reactions got rotated with respect to reaction names, rotate the other way self.affinity_energy_formatted_reactions.index = main_rxn_names else: reactions = [] for irow in range(0, df.shape[0]): redox_pair = df.loc[self.affinity_energy_reactions_table.index[irow], "redox_pairs"] oxidant = redox_pair[0] reductant = redox_pair[1] rxn_row = df.iloc[irow, 2:] rxn = rxn_row[rxn_row.notna()] coeffs = copy.copy(rxn[::2]).tolist() names = copy.copy(rxn[1::2]).tolist() react_grid = pd.DataFrame({"coeff":coeffs, "name":names}) react_grid["coeff"] = pd.to_numeric(react_grid["coeff"]) react_grid = react_grid.astype({'coeff': 'float'}) reactants = " + ".join([(str(-int(react_grid["coeff"][i]) if react_grid["coeff"][i].is_integer() else -react_grid["coeff"][i])+" " if -react_grid["coeff"][i] != 1 else "") + react_grid["name"][i] for i in range(0, len(react_grid["name"])) if react_grid["coeff"][i] < 0]) products = " + ".join([(str(int(react_grid["coeff"][i]) if react_grid["coeff"][i].is_integer() else react_grid["coeff"][i])+" " if react_grid["coeff"][i] != 1 else "") + react_grid["name"][i] for i in range(0, len(react_grid["name"])) if react_grid["coeff"][i] > 0]) if formatted: reactants = " + ".join([format_coeff(react_grid["coeff"][i]) + html_chemname_format_AqEquil(react_grid["name"][i], charge_sign_at_end=charge_sign_at_end) for i in range(0, len(react_grid["name"])) if react_grid["coeff"][i] < 0]) products = " + ".join([format_coeff(react_grid["coeff"][i]) + html_chemname_format_AqEquil(react_grid["name"][i], charge_sign_at_end=charge_sign_at_end) for i in range(0, len(react_grid["name"])) if react_grid["coeff"][i] > 0]) reaction = reactants + " = " + products reactions.append(reaction) self.affinity_energy_formatted_reactions["reaction"] = reactions df_out = copy.copy(self.affinity_energy_formatted_reactions) if hide_subreactions and not simplify: df_out = self.affinity_energy_formatted_reactions.loc[[ind for ind in self.affinity_energy_formatted_reactions.index if "_sub" not in ind[-4:]]] if isnotebook() and show: display(HTML(df_out.to_html(escape=False))) return df_out def speciate(self, input_filename, db='wrm', redox_flag='logfO2', redox_aux='Fe+3', default_logfO2=-6, exclude=[], suppress=[], alter_options=[], charge_balance_on='none', suppress_missing=True, verbose=1, report_filename=None, get_aq_dist=True, aq_dist_type='log_activity', get_mass_contribution=True, mass_contribution_other=True, get_mineral_sat=True, mineral_sat_type='affinity', get_redox=True, redox_type='Eh', get_ion_activity_ratios=True, get_fugacity=True, get_basis_totals=True, get_solid_solutions=True, get_affinity_energy=False, rxn_filename=None, not_limiting=['H+', 'OH-', 'H2O'], get_charge_balance=True, custom_db=False, extra_eqpt_output=False, batch_3o_filename=None, delete_generated_folders=False, custom_obigt=None)-
Calculate the equilibrium distribution of chemical species in solution. Additionally, calculate chemical affinities and energy supplies for user-specified reactions.
Parameters
input_filename:str-
User-supplied utf8-encoded comma separated value (csv) file containing sample data intended for speciation. The file must follow this format:
- the first row is a header row that must contain the names of the species to be included in the speciation calculation. There cannot be duplicate headers.
-
the second row must contain subheaders for each species in the header row. These subheaders must be taken from the following:
degC ppm ppb Suppressed Molality Molarity mg/L mg/kg.sol Alk., eq/kg.H2O Alk., eq/L Alk., eq/kg.sol Alk., mg/L CaCO3 Alk., mg/L HCO3- Log activity Log act combo Log mean act pX pH pHCl pmH pmX Hetero. equil. Homo. equil. Make non-basis -
'Temperature' must be included as a header, with 'degC' as its subheader.
- The first column must contain sample names. There cannot be duplicate sample names.
db:three letter str, default"wrm"- Three letter file extension for the desired thermodynamic database.
If
custom_dbis False, this database must be named data1.xyz (where xyz is your desired three letter extension) and located in the EQ3/6 'EQ36DA' path. Otherwise, the database must be named data0.xyz and located in your current working directory. Note that data1 files are already compiled by EQPT, while data0 files will be automatically compiled for you ifcustom_dbis True. redox_flag:str, default"O2(g)"-
Determines which column in the sample input file sets the overall redox state of the samples. Options for redox_flag include 'O2(g)', 'pe', 'Eh', 'logfO2', and 'redox aux'. The code will search your sample spreadsheet file (see
filename) for a column corresponding to the option you chose:- 'O2(g)' with a valid subheader for a gas
- 'pe' with subheader pe
- 'Eh' with subheader volts
- 'logfO2' with subheader logfO2
- 'redox aux' will search for a column corresponding to the
auxilliary basis species selected to form a redox couple with its
linked strict basis species (see
redox_aux). For example, the redox couple Fe+2/Fe+3 would require a column named Fe+3
If an appropriate header or redox data cannot be found to define redox state,
default_logfO2is used to set sample logfO2.There is a special case where dissolved oxygen can be used to impose sample redox state if
redox_flagis set to logfO2 and a column named logfO2 does not appear in your sample spreadsheet. If there is a column corresponding to dissolved oxygen measurements, logfO2 is calculated from the equilibrium reaction O2(aq) = O2(g) at the temperature and pressure of the sample using the revised Helgeson- Kirkham-Flowers (HKF) equation of state (JC Tanger IV and HC Helgeson, Am. J. Sci., 1988, 288, 19). redox_aux:default "Fe+3", optional- Ignored unless
redox_flagequals 1. Name of the auxilliary species whose reaction links it to a basis species (or another auxilliary species) such that they form a redox couple that controls sample fO2. For instance, Fe+3 is linked to Fe+2 in many supporting data files, so selectingredox_flag= 1 andredox_aux= "Fe+3" will set sample fO2 based on the Fe+2/Fe+3 redox couple. default_logfO2:float, default-6- Default value for sample logfO2 in case redox data cannot be found in the user-supplied sample spreadsheet.
exclude:listofstr, default[]- Names of columns in the user-supplied sample spreadsheet that should not be considered aqueous species. Useful for excluding columns containing sample metatadata, such as "Year" and "Location".
suppress:listofstr, default[]- Names of chemical species that will be prevented from forming in the speciation calculation.
alter_options:list, default[]- A list of lists, e.g.,
[["CaOH+", "Suppress"], ["CaCl+", "AugmentLogK", -1]]
The first element of each interior list is the name of a species.
The second element is an option to alter the species, and can be:
- Suppress : suppress the formation of the species. (See also:
suppress). - Replace : replace the species' log K value with a desired value. - AugmentLogK : augment the value of the species' log K. - AugmentG : augment the Gibbs free energy of the species by a desired value, in kcal/mol. The third element is a numeric value corresponding to the chosen option. A third element is not required for Suppress. charge_balance_on:str, default"none"- If "none", will not balance electrical charge between cations and anions in the speciation calculation. If a name of a species is supplied instead, the activity of that species will be allowed to change until charge balance is obtained. For example, charge_balance_on = "H+" will calculate what pH a sample must have to have zero net charge.
suppress_missing:bool, defaultTrue- Suppress the formation of an aqueous species if it is missing a value in the user-supplied sample spreadsheet?
verbose:int, 0, 1,or2, default1- Level determining how many messages are returned during a calculation. 2 for all messages, 1 for errors or warnings only, 0 for silent.
report_filename:str, optional- Name of the comma separated values (csv) report file generated when the calculation is complete. If this argument is not defined, a report file is not generated.
get_aq_dist:bool, defaultTrue- Calculate distributions of aqueous species?
aq_dist_type:str, default"log_activity"- Desired units of measurement for reported distributions of aqueous
species. Can be "molality", "log_molality", "log_gamma", or
"log_activity". Ignored if
get_aq_distis False. get_mass_contribution:bool, defaultTrue- Calculate basis species contributions to mass balance of aqueous species?
mass_contribution_other:bool, defaultTrue- Include an "other" species for the sake of summing percents of basis
species contributions to 100%? Ignored if
get_mass_contributionis False. get_mineral_sat:bool, defaultTrue- Calculate saturation states of pure solids?
mineral_sat_type:str, default"affinity"- Desired units of measurement for reported saturation states of pure
solids. Can be "logQoverK" or "affinity". Ignored if
get_mineral_satis False. get_redox:bool, defaultTrue- Calculate potentials of redox couples?
redox_type:str, default"Eh"- Desired units of measurement for reported redox potentials. Can be
"Eh", "pe", "logfO2", or "Ah". Ignored if
get_redoxis False. get_ion_activity_ratios:bool, defaultTrue- Calculate ion/H+ activity ratios and neutral species activities?
get_fugacity:bool, defaultTrue- Calculate gas fugacities?
get_basis_totals:bool, defaultTrue- Report total compositions of basis aqueous species?
get_solid_solutions:bool, defaultTrue- Permit the calculation of solid solutions and include them in the speciation report?
get_affinity_energy:bool, defaultFalse- Calculate affinities and energy supplies of reactions listed in a separate user-supplied file?
rxn_filename:str, optional- Name of .txt file containing reactions used to calculate affinities
and energy supplies. Ignored if
get_affinity_energyis False. not_limiting:list, default["H+", "OH-", "H2O"]- List containing names of species that are not considered limiting
when calculating energy supplies. Ignored if
get_affinity_energyis False. get_charge_balance:bool, defaultTrue- Calculate charge balance and ionic strength?
custom_db:bool, defaultFalse- Is the database defined by
dba custom user-supplied database? If this is set to True, searches for a data0.xyz file in the current working directory, where 'xyz' corresponds to the three letter code assigned todb. This data0 file is automatically converted into a machine-readable file called data1 by software called EQPT. This data1 file is then used in speciation calculations. extra_eqpt_output:bool, defaultFalse- Keep additional output files created by EQPT (see
custom_db)? Ignored ifcustom_dbis False. batch_3o_filename:str, optional- Name of rds (R object) file exported after the speciation calculation? No file will be generated if this argument is not defined.
delete_generated_folders:bool, defaultFalse- Delete the 'rxn_3i', 'rxn_3o', and 'rxn_3p' folders containing raw EQ3NR input, output, and pickup files once the speciation calculation is complete?
custom_obigt:str, optionalunlessget_affinity_energyis True- Path of custom database csv used to generate a custom data0 file. Needed for affinity and energy calculations.
Returns
speciation:objectofclass Speciation- Contains the results of the speciation calculation.
Expand source code
def speciate(self, input_filename, db="wrm", redox_flag="logfO2", redox_aux="Fe+3", default_logfO2=-6, exclude=[], suppress=[], alter_options=[], charge_balance_on="none", suppress_missing=True, verbose=1, report_filename=None, get_aq_dist=True, aq_dist_type="log_activity", get_mass_contribution=True, mass_contribution_other=True, get_mineral_sat=True, mineral_sat_type="affinity", get_redox=True, redox_type="Eh", get_ion_activity_ratios=True, get_fugacity=True, get_basis_totals=True, get_solid_solutions=True, get_affinity_energy=False, rxn_filename=None, not_limiting=["H+", "OH-", "H2O"], get_charge_balance=True, custom_db=False, extra_eqpt_output=False, batch_3o_filename=None, delete_generated_folders=False, custom_obigt=None): """ Calculate the equilibrium distribution of chemical species in solution. Additionally, calculate chemical affinities and energy supplies for user-specified reactions. Parameters ---------- input_filename : str User-supplied utf8-encoded comma separated value (csv) file containing sample data intended for speciation. The file must follow this format: - the first row is a header row that must contain the names of the species to be included in the speciation calculation. There cannot be duplicate headers. - the second row must contain subheaders for each species in the header row. These subheaders must be taken from the following: degC ppm ppb Suppressed Molality Molarity mg/L mg/kg.sol Alk., eq/kg.H2O Alk., eq/L Alk., eq/kg.sol Alk., mg/L CaCO3 Alk., mg/L HCO3- Log activity Log act combo Log mean act pX pH pHCl pmH pmX Hetero. equil. Homo. equil. Make non-basis - 'Temperature' must be included as a header, with 'degC' as its subheader. - The first column must contain sample names. There cannot be duplicate sample names. db : three letter str, default "wrm" Three letter file extension for the desired thermodynamic database. If `custom_db` is False, this database must be named data1.xyz (where xyz is your desired three letter extension) and located in the EQ3/6 'EQ36DA' path. Otherwise, the database must be named data0.xyz and located in your current working directory. Note that data1 files are already compiled by EQPT, while data0 files will be automatically compiled for you if `custom_db` is True. redox_flag : str, default "O2(g)" Determines which column in the sample input file sets the overall redox state of the samples. Options for redox_flag include 'O2(g)', 'pe', 'Eh', 'logfO2', and 'redox aux'. The code will search your sample spreadsheet file (see `filename`) for a column corresponding to the option you chose: * 'O2(g)' with a valid subheader for a gas * 'pe' with subheader pe * 'Eh' with subheader volts * 'logfO2' with subheader logfO2 * 'redox aux' will search for a column corresponding to the auxilliary basis species selected to form a redox couple with its linked strict basis species (see `redox_aux`). For example, the redox couple Fe+2/Fe+3 would require a column named Fe+3 If an appropriate header or redox data cannot be found to define redox state, `default_logfO2` is used to set sample logfO2. There is a special case where dissolved oxygen can be used to impose sample redox state if `redox_flag` is set to logfO2 and a column named logfO2 does not appear in your sample spreadsheet. If there is a column corresponding to dissolved oxygen measurements, logfO2 is calculated from the equilibrium reaction O2(aq) = O2(g) at the temperature and pressure of the sample using the revised Helgeson- Kirkham-Flowers (HKF) equation of state (JC Tanger IV and HC Helgeson, Am. J. Sci., 1988, 288, 19). redox_aux : default "Fe+3", optional Ignored unless `redox_flag` equals 1. Name of the auxilliary species whose reaction links it to a basis species (or another auxilliary species) such that they form a redox couple that controls sample fO2. For instance, Fe+3 is linked to Fe+2 in many supporting data files, so selecting `redox_flag` = 1 and `redox_aux` = "Fe+3" will set sample fO2 based on the Fe+2/Fe+3 redox couple. default_logfO2 : float, default -6 Default value for sample logfO2 in case redox data cannot be found in the user-supplied sample spreadsheet. exclude : list of str, default [] Names of columns in the user-supplied sample spreadsheet that should not be considered aqueous species. Useful for excluding columns containing sample metatadata, such as "Year" and "Location". suppress : list of str, default [] Names of chemical species that will be prevented from forming in the speciation calculation. alter_options : list, default [] A list of lists, e.g., [["CaOH+", "Suppress"], ["CaCl+", "AugmentLogK", -1]] The first element of each interior list is the name of a species. The second element is an option to alter the species, and can be: - Suppress : suppress the formation of the species. (See also: `suppress`). - Replace : replace the species' log K value with a desired value. - AugmentLogK : augment the value of the species' log K. - AugmentG : augment the Gibbs free energy of the species by a desired value, in kcal/mol. The third element is a numeric value corresponding to the chosen option. A third element is not required for Suppress. charge_balance_on : str, default "none" If "none", will not balance electrical charge between cations and anions in the speciation calculation. If a name of a species is supplied instead, the activity of that species will be allowed to change until charge balance is obtained. For example, charge_balance_on = "H+" will calculate what pH a sample must have to have zero net charge. suppress_missing : bool, default True Suppress the formation of an aqueous species if it is missing a value in the user-supplied sample spreadsheet? verbose : int, 0, 1, or 2, default 1 Level determining how many messages are returned during a calculation. 2 for all messages, 1 for errors or warnings only, 0 for silent. report_filename : str, optional Name of the comma separated values (csv) report file generated when the calculation is complete. If this argument is not defined, a report file is not generated. get_aq_dist : bool, default True Calculate distributions of aqueous species? aq_dist_type : str, default "log_activity" Desired units of measurement for reported distributions of aqueous species. Can be "molality", "log_molality", "log_gamma", or "log_activity". Ignored if `get_aq_dist` is False. get_mass_contribution : bool, default True Calculate basis species contributions to mass balance of aqueous species? mass_contribution_other : bool, default True Include an "other" species for the sake of summing percents of basis species contributions to 100%? Ignored if `get_mass_contribution` is False. get_mineral_sat : bool, default True Calculate saturation states of pure solids? mineral_sat_type : str, default "affinity" Desired units of measurement for reported saturation states of pure solids. Can be "logQoverK" or "affinity". Ignored if `get_mineral_sat` is False. get_redox : bool, default True Calculate potentials of redox couples? redox_type : str, default "Eh" Desired units of measurement for reported redox potentials. Can be "Eh", "pe", "logfO2", or "Ah". Ignored if `get_redox` is False. get_ion_activity_ratios : bool, default True Calculate ion/H+ activity ratios and neutral species activities? get_fugacity : bool, default True Calculate gas fugacities? get_basis_totals : bool, default True Report total compositions of basis aqueous species? get_solid_solutions : bool, default True Permit the calculation of solid solutions and include them in the speciation report? get_affinity_energy : bool, default False Calculate affinities and energy supplies of reactions listed in a separate user-supplied file? rxn_filename : str, optional Name of .txt file containing reactions used to calculate affinities and energy supplies. Ignored if `get_affinity_energy` is False. not_limiting : list, default ["H+", "OH-", "H2O"] List containing names of species that are not considered limiting when calculating energy supplies. Ignored if `get_affinity_energy` is False. get_charge_balance : bool, default True Calculate charge balance and ionic strength? custom_db : bool, default False Is the database defined by `db` a custom user-supplied database? If this is set to True, searches for a data0.xyz file in the current working directory, where 'xyz' corresponds to the three letter code assigned to `db`. This data0 file is automatically converted into a machine-readable file called data1 by software called EQPT. This data1 file is then used in speciation calculations. extra_eqpt_output : bool, default False Keep additional output files created by EQPT (see `custom_db`)? Ignored if `custom_db` is False. batch_3o_filename : str, optional Name of rds (R object) file exported after the speciation calculation? No file will be generated if this argument is not defined. delete_generated_folders : bool, default False Delete the 'rxn_3i', 'rxn_3o', and 'rxn_3p' folders containing raw EQ3NR input, output, and pickup files once the speciation calculation is complete? custom_obigt : str, optional unless `get_affinity_energy` is True Path of custom database csv used to generate a custom data0 file. Needed for affinity and energy calculations. Returns ------- speciation : object of class Speciation Contains the results of the speciation calculation. """ self.verbose = verbose # check input sample file for errors self._check_sample_input_file(input_filename, exclude, db, custom_db, charge_balance_on, suppress_missing) if aq_dist_type not in ["molality", "log_molality", "log_gamma", "log_activity"]: self.err_handler.raise_exception("Unrecognized aq_dist_type. Valid " "options are 'molality', 'log_molality', 'log_gamma', 'log_activity'") if mineral_sat_type not in ["logQoverK", "affinity"]: self.err_handler.raise_exception("Unrecognized mineral_sat_type. Valid " "options are 'logQoverK' or 'affinity'") if redox_type not in ["Eh", "pe", "logfO2", "Ah"]: self.err_handler.raise_exception("Unrecognized redox_type. Valid " "options are 'Eh', 'pe', 'logfO2', or 'Ah'") if redox_flag == "O2(g)" or redox_flag == -3: redox_flag = -3 elif redox_flag == "pe" or redox_flag == -2: redox_flag = -2 elif redox_flag == "Eh" or redox_flag == -1: redox_flag = -1 elif redox_flag == "logfO2" or redox_flag == 0: redox_flag = 0 elif redox_flag == "redox aux" or redox_flag == 1: redox_flag = 1 else: self.err_handler.raise_exception("Unrecognized redox flag. Valid options are 'O2(g)'" ", 'pe', 'Eh', 'logfO2', 'redox aux'") # handle batch_3o naming if batch_3o_filename != None: if ".rds" in batch_3o_filename[-4:]: batch_3o_filename = batch_3o_filename else: batch_3o_filename = "batch_3o_{}.rds".format(db) else: batch_3o_filename = ro.r("NULL") # custom obigt used for energy calculations (temporary fix to allow # custom data to be imported into CHNOSZ for energy calculations) # TODO: remove this and have code find custom data automatically. It's a tricky problem! if isinstance(custom_obigt, str): if os.path.exists(custom_obigt) and os.path.isfile(custom_obigt): pass else: err = ("Could not find custom_obigt file {}.".format(custom_obigt)) self.err_handler.raise_exception(err) else: custom_obigt = ro.r("NULL") if custom_db: # EQ3/6 cannot handle spaces in the 'EQ36DA' path name. if " " in os.getcwd(): msg = ("Error: the path to the custom database " "cannot contain spaces. The current path " "is: [ " + os.getcwd() + " ]. Remove or " "replace spaces in folder names for this " "feature. Example: [ " + os.getcwd().replace(" ", "-") + " ].") self.err_handler.raise_exception(msg) self.runeqpt(db, extra_eqpt_output) os.environ['EQ36DA'] = os.getcwd() data0_path = "data0." + db else: data0_path = self.eq36da + "/data0." + db if os.path.exists(data0_path) and os.path.isfile(data0_path): with open(data0_path) as data0: data0_lines = data0.readlines() start_index = [i+1 for i, s in enumerate(data0_lines) if s == 'temperatures\n'] end_index = [i for i, s in enumerate(data0_lines) if s == 'debye huckel a (adh)\n'] db_grids_unformatted = [i.split("pressures")[0] for i in data0_lines[start_index[0]:end_index[0]]] db_grids = [" ".join(i.split()) for i in db_grids_unformatted if i != ''] grid_temp = db_grids[0] + " " + db_grids[1] grid_press = db_grids[2] + " " + db_grids[3] grid_temp = grid_temp.split(" ") grid_press = grid_press.split(" ") try: water_model = data0_lines[1].split("model: ")[1] # extract water model from the second line of data0 file water_model = water_model.replace("\n", "") except: water_model = "SUPCRT92" # print("Water model could not be referenced from {}".format(data0_path)+"" # ". Defaulting to SUPCRT92 water model...") if(water_model not in ["SUPCRT92", "IAPWS95", "DEW"]): water_model = "SUPCRT92" # the default for EQ3/6 print("Water model given in {}".format(data0_path)+" was not " "recognized. Defaulting to SUPCRT92 water model...") else: # if a data0 file can't be found, assume default water model, 0-350 C and PSAT water_model = "SUPCRT92" grid_temp = ["0.0100", "50.0000", "100.0000", "150.0000", "200.0000", "250.0000", "300.0000", "350.0000"] grid_press = ["1.0000", "1.0000", "1.0132", "4.7572", "15.5365", "39.7365", "85.8378", "165.2113"] if get_affinity_energy: if rxn_filename == None and self.affinity_energy_reactions_raw==None: err = ("get_affinity_energy is set to True but a reaction " "file is not specified or redox reactions have not yet " "been generated with generate_redox_reactions()") self.err_handler.raise_exception(err) elif rxn_filename != None: self.__file_exists(rxn_filename, '.txt') self.affinity_energy_reactions_raw = pd.read_csv(rxn_filename, sep="\t", header=None, names=["col"+str(i) for i in range(1,50)]) load_rxn_file = True else: rxn_filename = self.affinity_energy_reactions_raw load_rxn_file = False else: rxn_filename = "" load_rxn_file=False # handle Alter/Suppress options # e.g. [["CaCl+", "AugmentLogK", -1], ["CaOH+", "Suppress"]] alter_options_dict = {} if len(alter_options) > 0: for ao in alter_options: key = ao[0] if ao[1] == "Suppress" and len(ao) == 2: ao += ["0"] alter_options_dict[key] = convert_to_RVector(list(ao[1:])) alter_options = ro.ListVector(alter_options_dict) # preprocess for EQ3 using R scripts self.__capture_r_output() r_prescript = pkg_resources.resource_string( __name__, 'preprocess_for_EQ3.r').decode("utf-8") ro.r(r_prescript) df_input_processed = ro.r.preprocess(input_filename=input_filename, exclude=convert_to_RVector( exclude), redox_flag=redox_flag, redox_aux=redox_aux, default_logfO2=default_logfO2, charge_balance_on=charge_balance_on, suppress_missing=suppress_missing, suppress=convert_to_RVector( suppress), alter_options=alter_options, water_model=water_model, grid_temp=convert_to_RVector(grid_temp), grid_press=convert_to_RVector(grid_press), get_solid_solutions=get_solid_solutions, verbose=self.verbose) for line in self.stderr: print(line) self.df_input_processed = ro.conversion.rpy2py(df_input_processed) # run EQ3 on each input file cwd = os.getcwd() self.__mk_check_del_directory('rxn_3o') self.__mk_check_del_directory('rxn_3p') files_3i, files_3i_paths = self.__read_inputs('3i', 'rxn_3i') input_dir = cwd + "/rxn_3i/" output_dir = cwd + "/rxn_3o/" pickup_dir = cwd + "/rxn_3p/" for file in files_3i: samplename = self.df_input_processed.loc[file[:-3], "Sample"] self.runeq3(filename_3i=file, db=db, samplename=samplename, path_3i=input_dir, path_3o=output_dir, path_3p=pickup_dir) if custom_db: os.environ['EQ36DA'] = self.eq36da files_3o = [file+".3o" for file in self.df_input_processed.index] df_input_processed_names = convert_to_RVector(list(self.df_input_processed.columns)) # mine output self.__capture_r_output() r_3o_mine = pkg_resources.resource_string( __name__, '3o_mine.r').decode("utf-8") ro.r(r_3o_mine) batch_3o = ro.r.main_3o_mine( files_3o=convert_to_RVector(files_3o), input_filename=input_filename, rxn_filename=rxn_filename, get_aq_dist=get_aq_dist, aq_dist_type=aq_dist_type, get_mass_contribution=get_mass_contribution, mass_contribution_other=mass_contribution_other, get_mineral_sat=get_mineral_sat, mineral_sat_type=mineral_sat_type, get_redox=get_redox, redox_type=redox_type, get_charge_balance=get_charge_balance, get_ion_activity_ratios=get_ion_activity_ratios, get_fugacity=get_fugacity, get_basis_totals=get_basis_totals, get_solid_solutions=get_solid_solutions, get_affinity_energy=get_affinity_energy, load_rxn_file=load_rxn_file, not_limiting=convert_to_RVector(not_limiting), batch_3o_filename=batch_3o_filename, df_input_processed=ro.conversion.py2rpy(self.df_input_processed), # New rpy2 py2rpy2 conversion might not need the workaround below. # The old note regarding deprecated pandas2ri is shown below... # OLD NOTE: # Needed for keeping symbols in column names after porting # df_input_processed in the line above. Some kind of check.names # option for pandas2ri.py2ri would be nice. Workaround: df_input_processed_names=df_input_processed_names, custom_obigt=custom_obigt, verbose=self.verbose, ) for line in self.stderr: print(line) if len(batch_3o) == 0: self.err_handler.raise_exception("Could not compile a speciation report. This is " "likely because errors occurred during " "the speciation calculation.") return if get_mass_contribution: mass_contribution = ro.conversion.rpy2py(batch_3o.rx2('mass_contribution')) df_report = ro.conversion.rpy2py(batch_3o.rx2('report')) #df_input = ro.conversion.rpy2py(batch_3o.rx2('input')) report_divs = batch_3o.rx2('report_divs') input_cols = list(report_divs.rx2('input')) df_input = df_report[input_cols].copy() # add a pressure column to df_input df_input["Pressure_bar"] = pd.Series(dtype='float') sample_data = batch_3o.rx2('sample_data') for sample in sample_data: df_input.loc[str(sample.rx2('name')[0]), "Pressure_bar"] = float(sample.rx2('pressure')[0]) report_divs[0] = convert_to_RVector(input_cols + ["Pressure_bar"]) # handle headers and subheaders of input section headers = [col.split("_")[0] for col in list(df_input.columns)] headers = ["pH" if header == "H+" else header for header in headers] headers = [header+"_(input)" if header not in ["Temperature", "logfO2", "Pressure"]+exclude else header for header in headers] report_divs[0] = convert_to_RVector(headers) # modify headers in the 'input' section, report_divs[0] subheaders = [subheader[1] if len(subheader) > 1 else "" for subheader in [ col.split("_") for col in list(df_input.columns)]] multicolumns = pd.MultiIndex.from_arrays( [headers, subheaders], names=['Sample', '']) df_input.columns = multicolumns df_join = df_input if get_aq_dist: aq_distribution_cols = list(report_divs.rx2('aq_distribution')) df_aq_distribution = df_report[aq_distribution_cols] df_aq_distribution = df_aq_distribution.apply(pd.to_numeric, errors='coerce') # create a pH column from H+ df_aq_distribution["pH"] = np.nan # pH values are assigned when sample data is assembled later # handle headers of aq_distribution section headers = df_aq_distribution.columns subheaders = [aq_dist_type]*(len(headers)-1) # -1 because the last column will have subheader pH (see next line) subheaders = subheaders + ["pH"] multicolumns = pd.MultiIndex.from_arrays( [headers, subheaders], names=['Sample', '']) df_aq_distribution.columns = multicolumns # ensure final pH column is included in report_divs aq_distribution section aq_dist_indx = report_divs.names.index("aq_distribution") report_divs[aq_dist_indx] = convert_to_RVector(list(headers)) df_join = df_join.join(df_aq_distribution) if get_mineral_sat: mineral_sat_cols = list(report_divs.rx2('mineral_sat')) mineral_sat_cols = [col for col in mineral_sat_cols if col != "df"] # TO DO: why is df appearing in mineral sat cols and redox sections? df_mineral_sat = df_report[mineral_sat_cols] df_mineral_sat = df_mineral_sat.apply(pd.to_numeric, errors='coerce') # handle headers of df_mineral_sat section if mineral_sat_type == "affinity": mineral_sat_unit = "affinity_kcal" elif mineral_sat_type == "logQoverK": mineral_sat_unit = "logQ/K" else: self.err_handler.raise_exception( "mineral_sat_type must be either 'affinity' or 'logQoverK'") headers = df_mineral_sat.columns subheaders = [mineral_sat_unit]*len(headers) multicolumns = pd.MultiIndex.from_arrays( [headers, subheaders], names=['Sample', '']) df_mineral_sat.columns = multicolumns df_join = df_join.join(df_mineral_sat) if get_redox: redox_cols = list(report_divs.rx2('redox')) redox_cols = [col for col in redox_cols if col != "df"] # TO DO: why is df appearing in mineral sat cols and redox sections? df_redox = df_report[redox_cols] df_redox = df_redox.apply(pd.to_numeric, errors='coerce') # handle headers of df_redox section if redox_type == "Eh": redox_unit = "Eh_volts" elif redox_type == "pe": redox_unit = "pe" elif redox_type == "logfO2": redox_unit = "logfO2" elif redox_type == "Ah": redox_unit = "Ah_kcal" else: self.err_handler.raise_exception( "redox_type must be either 'Eh', 'pe', 'logfO2', or 'Ah'") headers = df_redox.columns subheaders = [redox_unit]*len(headers) multicolumns = pd.MultiIndex.from_arrays( [headers, subheaders], names=['Sample', '']) df_redox.columns = multicolumns df_join = df_join.join(df_redox) if get_charge_balance: charge_balance_cols = list(report_divs.rx2('charge_balance')) df_charge_balance = df_report[charge_balance_cols] df_charge_balance = df_charge_balance.apply(pd.to_numeric, errors='coerce') # handle headers of df_charge_balance section headers = df_charge_balance.columns subheaders = ["%"]*2 + ['eq/kg.H2O', 'molality'] + \ ['eq/kg.H2O']*4 + ['molality'] multicolumns = pd.MultiIndex.from_arrays( [headers, subheaders], names=['Sample', '']) df_charge_balance.columns = multicolumns df_join = df_join.join(df_charge_balance) if get_ion_activity_ratios: ion_activity_ratio_cols = list(report_divs.rx2('ion_activity_ratios')) df_ion_activity_ratios = df_report[ion_activity_ratio_cols] df_ion_activity_ratios = df_ion_activity_ratios.apply(pd.to_numeric, errors='coerce') # handle headers of df_ion_activity_ratios section headers = df_ion_activity_ratios.columns subheaders = ["Log ion-H+ activity ratio"]*len(headers) multicolumns = pd.MultiIndex.from_arrays( [headers, subheaders], names=['Sample', '']) df_ion_activity_ratios.columns = multicolumns df_join = df_join.join(df_ion_activity_ratios) if get_fugacity: fugacity_cols = list(report_divs.rx2('fugacity')) df_fugacity = df_report[fugacity_cols] df_fugacity = df_fugacity.apply(pd.to_numeric, errors='coerce') # handle headers of fugacity section headers = df_fugacity.columns subheaders = ["log_fugacity"]*len(headers) multicolumns = pd.MultiIndex.from_arrays( [headers, subheaders], names=['Sample', '']) df_fugacity.columns = multicolumns df_join = df_join.join(df_fugacity) if get_basis_totals: sc_cols = list(report_divs.rx2('basis_totals')) df_sc = df_report[sc_cols] df_sc = df_sc.apply(pd.to_numeric, errors='coerce') # handle headers of basis_totals section headers = df_sc.columns subheaders = ["molality"]*(len(headers)) multicolumns = pd.MultiIndex.from_arrays( [headers, subheaders], names=['Sample', '']) df_sc.columns = multicolumns df_join = df_join.join(df_sc) if get_affinity_energy: affinity_cols = list(report_divs.rx2('affinity')) energy_cols = list(report_divs.rx2('energy')) df_affinity = df_report[affinity_cols] df_energy = df_report[energy_cols] df_affinity = df_affinity.apply(pd.to_numeric, errors='coerce') df_energy = df_energy.apply(pd.to_numeric, errors='coerce') # handle headers of df_affinity section headers = df_affinity.columns subheaders = ['cal/mol e-']*len(headers) multicolumns = pd.MultiIndex.from_arrays( [headers, subheaders], names=['Sample', '']) df_affinity.columns = multicolumns # handle headers of df_energy section headers = df_energy.columns subheaders = ['cal/kg.H2O']*len(headers) multicolumns = pd.MultiIndex.from_arrays( [headers, subheaders], names=['Sample', '']) df_energy.columns = multicolumns df_join = df_join.join(df_affinity) df_join = df_join.join(df_energy) out_dict = {'sample_data': {}, 'report': df_join, 'input': df_input, 'report_divs': report_divs} if get_mass_contribution: out_dict['mass_contribution'] = mass_contribution sample_data = batch_3o.rx2('sample_data') # assemble sample data for i, sample in enumerate(sample_data): dict_sample_data = { "filename": str(sample.rx2('filename')[0]), "name": str(sample.rx2('name')[0]), "temperature": float(sample.rx2('temperature')[0]), "pressure": float(sample.rx2('pressure')[0]), "logact_H2O": float(sample.rx2('logact_H2O')[0]), "H2O_density": float(sample.rx2('H2O_density')[0]), "H2O_molality": float(sample.rx2('H2O_molality')[0]), "H2O_log_molality": float(sample.rx2('H2O_log_molality')[0]), } if get_aq_dist: sample_aq_dist = ro.conversion.rpy2py(sample.rx2('aq_distribution')) sample_aq_dist = sample_aq_dist.apply(pd.to_numeric, errors='coerce') sample_pH = -sample_aq_dist.loc["H+", "log_activity"] out_dict["report"].loc[str(sample.rx2('name')[0]), "pH"] = sample_pH dict_sample_data.update({"aq_distribution": sample_aq_dist}) if get_mass_contribution: sample_mass_contribution = mass_contribution[mass_contribution["sample"] == sample.rx2('name')[0]] dict_sample_data.update( {"mass_contribution": sample_mass_contribution}) if get_mineral_sat: dict_sample_data.update( {"mineral_sat": ro.conversion.rpy2py(sample.rx2('mineral_sat')).apply(pd.to_numeric, errors='coerce')}) # replace sample mineral_sat entry with None if there is no mineral saturation data. if(len(dict_sample_data['mineral_sat'].index) == 1 and dict_sample_data['mineral_sat'].index[0] == 'None'): dict_sample_data['mineral_sat'] = None if get_redox: dict_sample_data.update( {"redox": ro.conversion.rpy2py(sample.rx2('redox')).apply(pd.to_numeric, errors='coerce')}) if get_charge_balance: dict_sample_data.update({"charge_balance": df_charge_balance.loc[sample.rx2('name')[0], :]}) if get_ion_activity_ratios: try: dict_sample_data.update( {"ion_activity_ratios": ro.conversion.rpy2py(sample.rx2('ion_activity_ratios'))}) except: dict_sample_data['ion_activity_ratios'] = None if get_fugacity: dict_sample_data.update( {"fugacity": ro.conversion.rpy2py(sample.rx2('fugacity')).apply(pd.to_numeric, errors='coerce')}) # replace sample fugacity entry with None if there is no fugacity data. if(len(dict_sample_data['fugacity'].index) == 1 and dict_sample_data['fugacity'].index[0] == 'None'): dict_sample_data['fugacity'] = None else: dict_sample_data["fugacity"]["fugacity"] = 10**dict_sample_data["fugacity"]["log_fugacity"] if get_basis_totals: sc_dist = ro.conversion.rpy2py(sample.rx2('basis_totals')) sc_dist = sc_dist.apply(pd.to_numeric, errors='coerce') dict_sample_data.update({"basis_totals": sc_dist}) if get_solid_solutions: sample_solid_solutions = batch_3o.rx2["sample_data"].rx2[sample.rx2('name')[0]].rx2["solid_solutions"] if not type(sample_solid_solutions.names) == rpy2.rinterface_lib.sexp.NULLType: ss_df_list = [] for ss in list(sample_solid_solutions.names): df_ss_ideal = sample_solid_solutions.rx2[ss].rx2["ideal solution"] df_ss_mineral = sample_solid_solutions.rx2[ss].rx2["mineral"] df_merged = pd.merge(df_ss_mineral, df_ss_ideal, left_on='mineral', right_on='component', how='left') df_merged.insert(0, 'solid solution', ss) del df_merged['component'] ss_df_list.append(df_merged) dict_sample_data.update( {"solid_solutions": pd.concat(ss_df_list)}) if get_affinity_energy: dict_sample_data.update({"affinity_energy_raw": ro.conversion.rpy2py( sample.rx2('affinity_energy_raw'))}) dict_sample_data.update( {"affinity_energy": ro.conversion.rpy2py(sample.rx2('affinity_energy'))}) out_dict["sample_data"].update( {sample_data.names[i]: dict_sample_data}) out_dict.update({"batch_3o": batch_3o}) out_dict.update({"water_model":water_model, "grid_temp":grid_temp, "grid_press":grid_press}) speciation = Speciation(out_dict, hide_traceback=self.hide_traceback) if get_affinity_energy: speciation.half_cell_reactions = self.half_cell_reactions speciation.affinity_energy_reactions_table = self.affinity_energy_reactions_table speciation.affinity_energy_formatted_reactions = self.affinity_energy_formatted_reactions speciation.show_redox_reactions = self.show_redox_reactions if report_filename != None: if ".csv" in report_filename[-4:]: out_dict["report"].to_csv(report_filename) else: out_dict["report"].to_csv(report_filename+".csv") if delete_generated_folders: self._delete_rxn_folders() if self.verbose > 0: print("Finished!") return speciation
class Error_Handler (clean=True)-
Handles how errors are printed in Jupyter notebooks. By default, errors that are handled by AqEquil are printed with an error message, but no traceback. Errors that are not handled by AqEquil, such as those thrown if the user encounters a bug, will display a full traceback.
If the error handler prints an error message without traceback, all future errors regardless of origin will be shown without traceback until the notebook kernel is restarted.
Attributes
clean:bool- Report exceptions without traceback? If True, only the error message is shown. If False, the entire error message, including traceback, is shown. Ignored if AqEquil is not being run in a Jupyter notebook.
Expand source code
class Error_Handler: """ Handles how errors are printed in Jupyter notebooks. By default, errors that are handled by AqEquil are printed with an error message, but no traceback. Errors that are not handled by AqEquil, such as those thrown if the user encounters a bug, will display a full traceback. If the error handler prints an error message without traceback, all future errors regardless of origin will be shown without traceback until the notebook kernel is restarted. Attributes ---------- clean : bool Report exceptions without traceback? If True, only the error message is shown. If False, the entire error message, including traceback, is shown. Ignored if AqEquil is not being run in a Jupyter notebook. """ def __init__(self, clean=True): self.clean = clean # bool: hide traceback? pass @staticmethod def hide_traceback(exc_tuple=None, filename=None, tb_offset=None, exception_only=False, running_compiled_code=False): """ Return a modified ipython showtraceback function that does not display traceback when encountering an error. """ ipython = get_ipython() etype, value, tb = sys.exc_info() value.__cause__ = None # suppress chained exceptions return ipython._showtraceback(etype, value, ipython.InteractiveTB.get_exception_only(etype, value)) def raise_exception(self, msg): """ Raise an exception that displays the error message without traceback. This happens only when the exception is predicted by the AqEquil package (e.g., for common user errors). """ if self.clean and isnotebook(): ipython = get_ipython() ipython.showtraceback = self.hide_traceback raise Exception(msg)Static methods
def hide_traceback(exc_tuple=None, filename=None, tb_offset=None, exception_only=False, running_compiled_code=False)-
Return a modified ipython showtraceback function that does not display traceback when encountering an error.
Expand source code
@staticmethod def hide_traceback(exc_tuple=None, filename=None, tb_offset=None, exception_only=False, running_compiled_code=False): """ Return a modified ipython showtraceback function that does not display traceback when encountering an error. """ ipython = get_ipython() etype, value, tb = sys.exc_info() value.__cause__ = None # suppress chained exceptions return ipython._showtraceback(etype, value, ipython.InteractiveTB.get_exception_only(etype, value))
Methods
def raise_exception(self, msg)-
Raise an exception that displays the error message without traceback. This happens only when the exception is predicted by the AqEquil package (e.g., for common user errors).
Expand source code
def raise_exception(self, msg): """ Raise an exception that displays the error message without traceback. This happens only when the exception is predicted by the AqEquil package (e.g., for common user errors). """ if self.clean and isnotebook(): ipython = get_ipython() ipython.showtraceback = self.hide_traceback raise Exception(msg)
class Speciation (args, hide_traceback=True)-
Stores the output of a speciation calculation.
Attributes
input:pd.DataFrame- Pandas dataframe containing user-supplied sample chemistry data.
mass_contribution:pd.DataFrame- Pandas dataframe containing basis species contributions to mass balance of aqueous species.
batch_3o:rpy2 ListVector- An rpy2 ListVector (R object) containing speciation results, in case analysis in R is preferred.
report:pd.DataFrame- Pandas dataframe reporting major results of speciation calculation in across all samples.
report_divs:rpy2 ListVector- An rpy2 ListVector of column names within the different sections of the speciation report.
sample_data:dict- Dictionary with sample names as keys and speciation results as values.
Expand source code
class Speciation(object): """ Stores the output of a speciation calculation. Attributes ---------- input : pd.DataFrame Pandas dataframe containing user-supplied sample chemistry data. mass_contribution : pd.DataFrame Pandas dataframe containing basis species contributions to mass balance of aqueous species. batch_3o : rpy2 ListVector An rpy2 ListVector (R object) containing speciation results, in case analysis in R is preferred. report : pd.DataFrame Pandas dataframe reporting major results of speciation calculation in across all samples. report_divs : rpy2 ListVector An rpy2 ListVector of column names within the different sections of the speciation report. sample_data : dict Dictionary with sample names as keys and speciation results as values. """ def __init__(self, args, hide_traceback=True): self.err_handler = Error_Handler(clean=hide_traceback) self.reactions_for_plotting = None # stores formatted reactions for plotting results of affinity and energy supply calculations for k in args: setattr(self, k, args[k]) def __getitem__(self, item): return getattr(self, item) @staticmethod def __unique(seq): """ Provide a sequence, get a list of non-repeating elements in the same order. """ seen = set() seen_add = seen.add return [x for x in seq if not (x in seen or seen_add(x))] def save(self, filename, messages=True): """ Save the speciation as a '.speciation' file to your current working directory. This file can be loaded with `AqEquil.load(filename)`. Parameters ---------- filename : str The desired name of the file. messages : str Print a message confirming the save? """ if filename[-11:] != '.speciation': filename = filename + '.speciation' with open(filename, 'wb') as handle: dill.dump(self, handle, protocol=dill.HIGHEST_PROTOCOL) if messages: print("Saved as '{}'".format(filename)) @staticmethod def __save_figure(fig, save_as, save_format, save_scale, plot_width, plot_height, ppi): if isinstance(save_format, str) and save_format not in ['png', 'jpg', 'jpeg', 'webp', 'svg', 'pdf', 'eps', 'json', 'html']: self.err_handler.raise_exception("{}".format(save_format)+" is an unrecognized " "save format. Supported formats include 'png', " "'jpg', 'jpeg', 'webp', 'svg', 'pdf', 'eps', " "'json', or 'html'") if isinstance(save_format, str): if not isinstance(save_as, str): save_as = "newplot" if save_format=="html": fig.write_html(save_as+".html") print("Saved figure as {}".format(save_as)+".html") save_format = 'png' elif save_format in ['pdf', 'eps', 'json']: pio.write_image(fig, save_as+"."+save_format, format=save_format, scale=save_scale, width=plot_width*ppi, height=plot_height*ppi) print("Saved figure as {}".format(save_as)+"."+save_format) save_format = "png" else: pio.write_image(fig, save_as+"."+save_format, format=save_format, scale=save_scale, width=plot_width*ppi, height=plot_height*ppi) print("Saved figure as {}".format(save_as)+"."+save_format) else: save_format = "png" return save_as, save_format @staticmethod def __get_unit_info(subheader): unit_name_dict = { "pH" : ("", "pH"), "ppm" : ("", "ppm"), "ppb" : ("", "ppb"), "mg/L" : ("", "mg/L"), "degC" : ("temperature", "°C"), "log_molality" : ("log molality", "log(mol/kg)"), "Molality" : ("molality", "mol/kg"), "molality" : ("molality", "mol/kg"), "molal" : ("molality", "mol/kg"), "log_activity" : ("log activity", ""), "Log activity" : ("log activity", ""), "mg/kg.sol" : ("", "mg solute per kg solution"), "Alk., eq/kg.H2O" : ("alkalinity", "eq/kg"), "Alk., eq/L" : ("alkalinity", "eq/L"), "Alk., eq/kg.sol" : ("alkalinity", "eq/kg solution"), "Alk., mg/L CaCO3" : ("alkalinity", "mg/L CaCO3"), "Alk., mg/L HCO3-" : ("alkalinity", "mg/L HCO3-"), "pX" : ("-(log activity)", "-log(mol/kg)"), "activity" : ("activity", ""), "log_gamma" : ("log gamma", "log(kg/mol)"), "gamma" : ("gamma", "kg/mol"), "affinity_kcal" : ("affinity", "kcal/mol"), "%" : ("", "%"), "Eh_volts" : ("Eh", "volts"), "eq/kg.H2O" : ("charge", "eq/kg"), "logfO2" : ("", ""), "cal/mol e-" : ("affinity", "cal/mol e-"), "cal/kg.H2O" : ("energy supply", "cal/kg H2O"), "Log ion-H+ activity ratio" : ("Log ion-H+ activity ratio", ""), "log_fugacity" : ("log fugacity", "log(bar)"), "fugacity" : ("fugacity", "bar"), "bar" : ("", "bar"), } out = unit_name_dict.get(subheader) return out[0], out[1] def lookup(self, col=None): """ Look up desired columns in the speciation report. Parameters ---------- col : str or list of str Leave blank to get a list of section names in the report: ```speciation.lookup()``` Provide the name of a section to look up the names of columns in that section of the report: ```speciation.lookup("aq_distribution")``` Provide a column name (or a list of column names) to retrieve the column from the report: ```speciation.lookup(["Temperature", "O2"])``` Returns ---------- Pandas dataframe or list of str If a column name (or list of column names) is provided, returns the speciation report with only the desired column(s). Otherwise returns a list of section names (if no arguments are provided), or a list of columns in a section (if a section name is provided). """ names_length = len(self.report_divs.names) if col==None and names_length>0: return list(self.report_divs.names) if names_length>0: if col in list(self.report_divs.names): return list(self.report_divs.rx2(col)) if isinstance(col, str): col = [col] return self.report.iloc[:, self.report.columns.get_level_values(0).isin(set(col))] def __convert_aq_units_to_log_friendly(self, species, rows): col_data = self.lookup(species) col_data = col_data.loc[rows] if col_data.columns.get_level_values(1) == 'log_activity': y = [10**float(s[0]) if s[0] != 'NA' else float("nan") for s in col_data.values.tolist()] out_unit = 'activity' elif col_data.columns.get_level_values(1) == 'log_molality': y = [10**float(s[0]) if s[0] != 'NA' else float("nan") for s in col_data.values.tolist()] out_unit = 'molality' elif col_data.columns.get_level_values(1) == 'log_gamma': y = [10**float(s[0]) if s[0] != 'NA' else float("nan") for s in col_data.values.tolist()] out_unit = 'gamma' elif col_data.columns.get_level_values(1) == 'log_fugacity': y = [10**float(s[0]) if s[0] != 'NA' else float("nan") for s in col_data.values.tolist()] out_unit = 'fugacity' else: y = [float(s[0]) if s[0] != 'NA' else float("nan") for s in col_data.values.tolist()] out_unit = col_data.columns.get_level_values(1)[0] return y, out_unit def plot_mineral_saturation(self, sample_name, title=None, mineral_sat_type="affinity", plot_width=4, plot_height=3, ppi=122, colors=["blue", "orange"], save_as=None, save_format=None, save_scale=1, interactive=True, hide_traceback=True): """ Vizualize mineral saturation states in a sample as a bar plot. Parameters ---------- sample_name : str Name of the sample to plot. title : str, optional Title of the plot. mineral_sat_type : str, default "affinity" Metric for mineral saturation state to plot. Can be "affinity" or "logQoverK". colors : list of two str, default ["blue", "orange"] Sets the color of the bars representing supersaturated and undersaturated states, respectively. save_as : str, optional Provide a filename to save this figure. Filetype of saved figure is determined by `save_format`. Note: interactive plots can be saved by clicking the 'Download plot' button in the plot's toolbar. save_format : str, default "png" Desired format of saved or downloaded figure. Can be 'png', 'jpg', 'jpeg', 'webp', 'svg', 'pdf', 'eps', 'json', or 'html'. If 'html', an interactive plot will be saved. Only 'png', 'svg', 'jpeg', and 'webp' can be downloaded with the 'download as' button in the toolbar of an interactive plot. save_scale : numeric, default 1 Multiply title/legend/axis/canvas sizes by this factor when saving the figure. interactive : bool, default True Return an interactive plot if True or a static plot if False. """ if sample_name not in self.report.index: msg = ("Could not find '{}'".format(sample_name)+" among sample " "names in the speciation report. Sample names include " "{}".format(list(self.report.index))) err_handler.raise_exception(msg) if isinstance(self.sample_data[sample_name].get('mineral_sat', None), pd.DataFrame): mineral_data = self.sample_data[sample_name]['mineral_sat'][mineral_sat_type].astype(float).sort_values(ascending=False) x = mineral_data.index else: msg = ("This sample does not have mineral saturation state data." "To generate this data, ensure get_mineral_sat=True when " "running speciate(), or ensure this sample has " "mineral-forming basis species.") self.err_handler.raise_exception(msg) color_list = [colors[0] if m >= 0 else colors[1] for m in mineral_data] if mineral_sat_type == "affinity": ylabel = 'affinity, kcal/mol' if mineral_sat_type == "logQoverK": ylabel = 'logQ/K' if title==None: title = sample_name + " mineral saturation index" df = pd.DataFrame(mineral_data) fig = px.bar(df, x=df.index, y="affinity", height=plot_height*ppi, width=plot_width*ppi, labels={'affinity': ylabel}, template="simple_white") fig.update_traces(hovertemplate = "%{x} <br>"+ylabel+": %{y}", marker_color=color_list) fig.update_layout(xaxis_tickangle=-45, xaxis_title=None, title={'text':title, 'x':0.5, 'xanchor':'center'}, margin={"t":40}, xaxis={'fixedrange':True}, yaxis={'fixedrange':True, 'exponentformat':'power'}) save_as, save_format = self.__save_figure(fig, save_as, save_format, save_scale, plot_width, plot_height, ppi) config = {'displaylogo': False, 'modeBarButtonsToRemove': ['zoom2d', 'pan2d', 'select2d', 'lasso2d', 'zoomIn2d', 'zoomOut2d', 'autoScale2d', 'resetScale2d', 'toggleSpikelines'], 'toImageButtonOptions': { 'format': save_format, # one of png, svg, jpeg, webp 'filename': save_as, 'height': plot_height*ppi, 'width': plot_width*ppi, 'scale': save_scale, }, } if not interactive: config['staticPlot'] = True fig.show(config=config) def barplot(self, y, title=None, convert_log=True, show_missing=True, plot_width=4, plot_height=3, ppi=122, colormap="WORM", save_as=None, save_format=None, save_scale=1, interactive=True): """ Show a bar plot to vizualize one or more variables across all samples. Parameters ---------- y : str or list of str Name (or list of names) of the variables to plot. Valid variables are columns in the speciation report. title : str, optional Title of the plot. convert_log : bool, default True Convert units "log_activity", "log_molality", "log_gamma", and "log_fugacity" to "activity", "molality", "gamma", and "fugacity", respectively? show_missing : bool, default True Show samples that do not have bars? plot_width, plot_height : numeric, default 4 by 3 Width and height of the plot, in inches. ppi : numeric, default 122 Pixels per inch. Along with `plot_width` and `plot_height`, determines the size of interactive plots. colormap : str, default "WORM" Name of the colormap to color the scatterpoints. Accepts "WORM", "colorblind", or matplotlib colormaps. See https://matplotlib.org/stable/tutorials/colors/colormaps.html The "colorblind" colormap is referenced from Wong, B. Points of view: Color blindness. Nat Methods 8, 441 (2011). https://doi.org/10.1038/nmeth.1618 save_as : str, optional Provide a filename to save this figure. Filetype of saved figure is determined by `save_format`. Note: interactive plots can be saved by clicking the 'Download plot' button in the plot's toolbar. save_format : str, default "png" Desired format of saved or downloaded figure. Can be 'png', 'jpg', 'jpeg', 'webp', 'svg', 'pdf', 'eps', 'json', or 'html'. If 'html', an interactive plot will be saved. Only 'png', 'svg', 'jpeg', and 'webp' can be downloaded with the 'download as' button in the toolbar of an interactive plot. save_scale : numeric, default 1 Multiply title/legend/axis/canvas sizes by this factor when saving the figure. interactive : bool, default True Return an interactive plot if True or a static plot if False. """ if not isinstance(y, list): y = [y] colors = get_colors(colormap, len(y)) # convert rgba to hex colors = [matplotlib.colors.rgb2hex(c) for c in colors] # map each species to its color, e.g., # {'CO2': '#000000', 'HCO3-': '#1699d3', 'Other': '#736ca8'} dict_species_color = {sp:color for sp,color in zip(y, colors)} # html format color dict key names dict_species_color = {html_chemname_format_AqEquil(k):v for k,v in dict_species_color.items()} y_cols = self.lookup(y) if not show_missing: y_cols = y_cols.dropna(how='all') # this df will keep subheaders x = y_cols.index # names of samples df = self.lookup(["name"]+y).copy() if not show_missing: df = df.dropna(how='all') # this df will lose subheaders (flattened) df.loc[:, "name"] = df.index df.columns = df.columns.get_level_values(0) for i, yi in enumerate(y): y_col = y_cols.iloc[:, y_cols.columns.get_level_values(0)==yi] try: subheader = y_col.columns.get_level_values(1)[0] except: msg = ("Could not find '{}' ".format(yi)+"in the speciation " "report. Available variables include " "{}".format(list(set(self.report.columns.get_level_values(0))))) self.err_handler.raise_exception(msg) try: unit_type, unit = self.__get_unit_info(subheader) except: unit_type = "" unit = "" try: y_vals = [float(y0[0]) if y0[0] != 'NA' else float("nan") for y0 in y_col.values.tolist()] except: msg = ("One or more the values belonging to " "'{}' are non-numeric and cannot be plotted.".format(y_col.columns.get_level_values(0)[0])) self.err_handler.raise_exception(msg) if convert_log and [abs(y0) for y0 in y_vals] != y_vals: # convert to bar-friendly units if possible if subheader in ["log_activity", "log_molality", "log_gamma", "log_fugacity"]: y_plot, out_unit = self.__convert_aq_units_to_log_friendly(yi, rows=x) unit_type, unit = self.__get_unit_info(out_unit) else: y_plot = y_vals else: y_plot = y_vals if i == 0: subheader_previous = subheader unit_type_previous = unit_type if unit_type != unit_type_previous and i != 0: msg = ("{} has a different unit of measurement ".format(yi)+"" "({}) than {} ({}). ".format(unit, yi_previous, unit_type_previous)+"" "Plotted variables must share units.") self.err_handler.raise_exception(msg) elif "activity" in subheader.lower() and "molality" in subheader_previous.lower(): msg = ("{} has a different unit of measurement ".format(yi)+"" "({}) than {} ({}). ".format("activity", yi_previous, "molality")+"" "Plotted variables must share units.") self.err_handler.raise_exception(msg) elif "molality" in subheader.lower() and "activity" in subheader_previous.lower(): msg = ("{} has a different unit of measurement ".format(yi)+"" "({}) than {} ({}). ".format("molality", yi_previous, "activity")+"" "Plotted variables must share units.") self.err_handler.raise_exception(msg) yi_previous = copy.deepcopy(yi) unit_type_previous = copy.deepcopy(unit_type) subheader_previous = copy.deepcopy(subheader) df.loc[:, yi] = y_plot if len(y) > 1: if unit != "": ylabel = "{} [{}]".format(unit_type, unit) else: ylabel = unit_type else: if 'pH' in y: ylabel = 'pH' elif 'Temperature' in y: ylabel = 'Temperature [°C]' else: if unit != "": ylabel = "{} {} [{}]".format(html_chemname_format_AqEquil(y[0]), unit_type, unit) else: ylabel = "{} {}".format(html_chemname_format_AqEquil(y[0]), unit_type) df = pd.melt(df, id_vars=["name"], value_vars=y) df = df.rename(columns={"Sample": "y_variable", "value": "y_value"}) df['y_variable'] = df['y_variable'].apply(html_chemname_format_AqEquil) if unit_type == "energy supply" or unit_type == "affinity": # get formatted reactions to display if not isinstance(self.reactions_for_plotting, pd.DataFrame): self.reactions_for_plotting = self.show_redox_reactions(formatted=True, charge_sign_at_end=False, show=False, simplify=True) y_find = [yi.replace("_energy", "").replace("_affinity", "") for yi in y] rxns = self.reactions_for_plotting.loc[y_find, :]["reaction"].tolist()*len(x) if len(y) == 1: ylabel = "{}<br>{} [{}]".format(html_chemname_format_AqEquil(y_find[0]), unit_type, unit) # customdata for displaying reactions has to be here instead of in update_traces fig = px.bar(df, x="name", y="y_value", height=plot_height*ppi, width=plot_width*ppi, color='y_variable', barmode='group', labels={'y_value': ylabel}, template="simple_white", color_discrete_map=dict_species_color, custom_data=[rxns]) fig.update_traces(hovertemplate = "%{x} <br>"+ylabel+": %{y}<br>%{customdata[0]}") else: fig = px.bar(df, x="name", y="y_value", height=plot_height*ppi, width=plot_width*ppi, color='y_variable', barmode='group', labels={'y_value': ylabel}, template="simple_white", color_discrete_map=dict_species_color) fig.update_traces(hovertemplate = "%{x} <br>"+ylabel+": %{y}") fig.update_layout(xaxis_tickangle=-45, xaxis_title=None, title={'text':title, 'x':0.5, 'xanchor':'center'}, legend_title=None, margin={"t": 40}, xaxis={'fixedrange':True}, yaxis={'fixedrange':True, 'exponentformat':'power'}) if len(y) == 1: fig.update_layout(showlegend=False) save_as, save_format = self.__save_figure(fig, save_as, save_format, save_scale, plot_width, plot_height, ppi) config = {'displaylogo': False, 'modeBarButtonsToRemove': ['zoom2d', 'pan2d', 'select2d', 'lasso2d', 'zoomIn2d', 'zoomOut2d', 'autoScale2d', 'resetScale2d', 'toggleSpikelines'], 'toImageButtonOptions': { 'format': save_format, 'filename': save_as, 'height': plot_height*ppi, 'width': plot_width*ppi, 'scale': save_scale, }, } if not interactive: config['staticPlot'] = True fig.show(config=config) def scatterplot(self, x="pH", y="Temperature", title=None, plot_width=4, plot_height=3, ppi=122, fill_alpha=0.7, point_size=10, colormap="WORM", save_as=None, save_format=None, save_scale=1, interactive=True): """ Vizualize two or more sample variables with a scatterplot. Parameters ---------- x, y : str, default for x is "pH", default for y is "Temperature" Names of the variables to plot against each other. Valid variables are columns in the speciation report. `y` can be a list of of variable names for a multi-series scatterplot. title : str, optional Title of the plot. plot_width, plot_height : numeric, default 4 by 3 Width and height of the plot, in inches. Size of interactive plots is also determined by pixels per inch, set by the parameter `ppi`. ppi : numeric, default 122 Pixels per inch. Along with `plot_width` and `plot_height`, determines the size of interactive plots. fill_alpha : numeric, default 0.7 Transparency of scatterpoint area fill. point_size : numeric, default 10 Size of scatterpoints. colormap : str, default "WORM" Name of the colormap to color the scatterpoints. Accepts "WORM", "colorblind", or matplotlib colormaps. See https://matplotlib.org/stable/tutorials/colors/colormaps.html The "colorblind" colormap is referenced from Wong, B. Points of view: Color blindness. Nat Methods 8, 441 (2011). https://doi.org/10.1038/nmeth.1618 save_as : str, optional Provide a filename to save this figure. Filetype of saved figure is determined by `save_format`. Note: interactive plots can be saved by clicking the 'Download plot' button in the plot's toolbar. save_format : str, default "png" Desired format of saved or downloaded figure. Can be 'png', 'jpg', 'jpeg', 'webp', 'svg', 'pdf', 'eps', 'json', or 'html'. If 'html', an interactive plot will be saved. Only 'png', 'svg', 'jpeg', and 'webp' can be downloaded with the 'download as' button in the toolbar of an interactive plot. save_scale : numeric, default 1 Multiply title/legend/axis/canvas sizes by this factor when saving the figure. interactive : bool, default True Return an interactive plot if True or a static plot if False. """ if not isinstance(y, list): y = [y] if not isinstance(x, str): self.err_handler.raise_exception("x must be a string.") x_col = self.lookup(x) try: xsubheader = x_col.columns.get_level_values(1)[0] except: msg = ("Could not find '{}' ".format(x)+"in the speciation " "report. Available variables include " "{}".format(list(set(self.report.columns.get_level_values(0))))) self.err_handler.raise_exception(msg) try: x_plot = [float(x0[0]) if x0[0] != 'NA' else float("nan") for x0 in x_col.values.tolist()] except: msg = ("One or more the values belonging to " "'{}' are non-numeric and cannot be plotted.".format(x_col.columns.get_level_values(0)[0])) self.err_handler.raise_exception(msg) try: xunit_type, xunit = self.__get_unit_info(xsubheader) except: xunit_type = "" xunit = "" colors = get_colors(colormap, len(y), alpha=fill_alpha) for i, yi in enumerate(y): y_col = self.lookup(yi) try: subheader = y_col.columns.get_level_values(1)[0] except: msg = ("Could not find '{}' ".format(yi)+"in the speciation " "report. Available variables include " "{}".format(list(set(self.report.columns.get_level_values(0))))) self.err_handler.raise_exception(msg) try: unit_type, unit = self.__get_unit_info(subheader) except: unit_type = "" unit = "" try: y_plot = [float(y0[0]) if y0[0] != 'NA' else float("nan") for y0 in y_col.values.tolist()] except: msg = ("One or more the values belonging to " "'{}' are non-numeric and cannot be plotted.".format(y_col.columns.get_level_values(0)[0])) self.err_handler.raise_exception(msg) if i == 0: subheader_previous = subheader unit_type_previous = unit_type if unit_type != unit_type_previous and i != 0: msg = ("{} has a different unit of measurement ".format(yi)+"" "({}) than {} ({}). ".format(unit_type, yi_previous, unit_type_previous)+"" "Plotted variables must share units.") self.err_handler.raise_exception(msg) elif "activity" in subheader.lower() and "molality" in subheader_previous.lower(): msg = ("{} has a different unit of measurement ".format(yi)+"" "({}) than {} ({}). ".format("activity", yi_previous, "molality")+"" "Plotted variables must share units.") self.err_handler.raise_exception(msg) elif "molality" in subheader.lower() and "activity" in subheader_previous.lower(): msg = ("{} has a different unit of measurement ".format(yi)+"" "({}) than {} ({}). ".format("molality", yi_previous, "activity")+"" "Plotted variables must share units.") self.err_handler.raise_exception(msg) yi_previous = copy.deepcopy(yi) unit_type_previous = copy.deepcopy(unit_type) subheader_previous = copy.deepcopy(subheader) if len(y) > 1: if unit != "": ylabel = "{} [{}]".format(unit_type, unit) else: ylabel = unit_type else: if 'pH' in y: ylabel = 'pH' elif 'Temperature' in y: ylabel = 'Temperature [°C]' else: y_formatted = html_chemname_format_AqEquil(y[0]) if unit != "": ylabel = "{} {} [{}]".format(y_formatted, unit_type, unit) else: ylabel = "{} {}".format(y_formatted, unit_type) if x == 'pH': xlabel = 'pH' elif x == 'Temperature': xlabel = 'Temperature [°C]' else: x_formatted = html_chemname_format_AqEquil(x) if xunit != "": xlabel = "{} {} [{}]".format(x_formatted, xunit_type, xunit) else: xlabel = "{} {}".format(x_formatted, xunit_type) # convert rgba to hex colors = [matplotlib.colors.rgb2hex(c) for c in colors] # map each species to its color, e.g., # {'CO2': '#000000', 'HCO3-': '#1699d3', 'Other': '#736ca8'} dict_species_color = {sp:color for sp,color in zip(y, colors)} # html format color dict key names dict_species_color = {html_chemname_format_AqEquil(k):v for k,v in dict_species_color.items()} df = self.lookup(["name", x]+y).copy() df.loc[:, "name"] = df.index df.columns = df.columns.get_level_values(0) df = pd.melt(df, id_vars=["name", x], value_vars=y) df = df.rename(columns={"Sample": "y_variable", "value": "y_value"}) if (unit_type == "energy supply" or unit_type == "affinity") and self.reactions_for_plotting!=None: # get formatted reactions to display if not isinstance(self.reactions_for_plotting, pd.DataFrame): self.reactions_for_plotting = self.show_redox_reactions(formatted=True, charge_sign_at_end=False, show=False, simplify=True) y_find = [yi.replace("_energy", "").replace("_affinity", "") for yi in y] rxns = self.reactions_for_plotting.loc[y_find, :]["reaction"].tolist() rxn_dict = {rxn_name:rxn for rxn_name,rxn in zip(y, rxns)} if len(y) == 1: ylabel = "{}<br>{} [{}]".format(html_chemname_format_AqEquil(y_find[0]), unit_type, unit) df["formatted_rxn"] = df["y_variable"].map(rxn_dict) else: df["formatted_rxn"] = "" df['y_variable'] = df['y_variable'].apply(html_chemname_format_AqEquil) fig = px.scatter(df, x=x, y="y_value", color="y_variable", hover_data=[x, "y_value", "y_variable", "name", "formatted_rxn"], width=plot_width*ppi, height=plot_height*ppi, labels={x: xlabel, "y_value": ylabel}, category_orders={"species": y}, color_discrete_map=dict_species_color, opacity=fill_alpha, custom_data=['name', 'formatted_rxn'], template="simple_white") fig.update_traces(marker=dict(size=point_size), hovertemplate = "%{customdata[0]}<br>"+xlabel+": %{x} <br>"+ylabel+": %{y}<br>%{customdata[1]}") fig.update_layout(legend_title=None, title={'text':title, 'x':0.5, 'xanchor':'center'}, margin={"t": 40}, yaxis={'exponentformat':'power'}) if len(y) == 1: fig.update_layout(showlegend=False) save_as, save_format = self.__save_figure(fig, save_as, save_format, save_scale, plot_width, plot_height, ppi) config = {'displaylogo': False, 'scrollZoom': True, 'modeBarButtonsToRemove': ['select2d', 'lasso2d', 'toggleSpikelines', 'resetScale2d'], 'toImageButtonOptions': { 'format': save_format, # one of png, svg, jpeg, webp 'filename': save_as, 'height': plot_height*ppi, 'width': plot_width*ppi, 'scale': save_scale, }, } if not interactive: config['staticPlot'] = True fig.show(config=config) def plot_mass_contribution(self, basis, title=None, sort_by=None, ascending=True, sort_y_by=None, width=0.9, colormap="WORM", plot_width=4, plot_height=3, ppi=122, save_as=None, save_format=None, save_scale=1, interactive=True): """ Plot basis species contributions to mass balance of aqueous species across all samples. Parameters ---------- basis : str Name of the basis species. title : str, optional Title of the plot. sort_by : str, optional Name of the variable used to sort samples. Variable names must be taken from the speciation report column names. No sorting is done by default. ascending : bool, default True Should sample sorting be in ascending order? Descending if False. Ignored unless `sort_by` is defined. sort_y_by : list of str or 'alphabetical', optional List of species names in the order that they should be stacked, from the bottom of the plot to the top. 'alphabetical' will sort species alphabetically. width : float, default 0.9 Width of bars. No space between bars if width=1.0. colormap : str, default "WORM" Name of the colormap to color the scatterpoints. Accepts "WORM", "colorblind", or matplotlib colormaps. See https://matplotlib.org/stable/tutorials/colors/colormaps.html The "colorblind" colormap is referenced from Wong, B. Points of view: Color blindness. Nat Methods 8, 441 (2011). https://doi.org/10.1038/nmeth.1618 plot_width, plot_height : numeric, default 4 by 3 Width and height of the plot, in inches. Size of interactive plots is also determined by pixels per inch, set by the parameter `ppi`. ppi : numeric, default 122 Pixels per inch. Along with `plot_width` and `plot_height`, determines the size of interactive plots. save_as : str, optional Provide a filename to save this figure. Filetype of saved figure is determined by `save_format`. Note: interactive plots can be saved by clicking the 'Download plot' button in the plot's toolbar. save_format : str, default "png" Desired format of saved or downloaded figure. Can be 'png', 'jpg', 'jpeg', 'webp', 'svg', 'pdf', 'eps', 'json', or 'html'. If 'html', an interactive plot will be saved. Only 'png', 'svg', 'jpeg', and 'webp' can be downloaded with the 'download as' button in the toolbar of an interactive plot. save_scale : numeric, default 1 Multiply title/legend/axis/canvas sizes by this factor when saving the figure. interactive : bool, default True Return an interactive plot if True or a static plot if False. """ try: self.mass_contribution except: msg = ("Results for basis species contributions to aqueous mass " "balance could not be found. Ensure that " "get_mass_contribution = True when running speciate().") self.err_handler.raise_exception(msg) if basis not in set(self.mass_contribution['basis']): msg = ("The basis species {} ".format(basis)+"could not be found " "among available basis species: " "{}".format(str(list(set(self.mass_contribution['basis']))))) self.err_handler.raise_exception(msg) df_sp = copy.deepcopy(self.mass_contribution.loc[self.mass_contribution['basis'] == basis]) if sort_by != None: if sort_by in self.report.columns.get_level_values(0): sort_col = self.lookup(sort_by) sort_by_unit = sort_col.columns.get_level_values(1)[0] sort_index = sort_col.sort_values([(sort_by, sort_by_unit)], ascending=ascending).index df_list = [] for i in sort_index: df_list.append(df_sp[df_sp['sample']==i]) df_sp = pd.concat(df_list) else: msg = ("Could not find {}".format(sort_by)+" in the " "speciation report. Available variables include " "{}".format(list(self.report.columns.get_level_values(0)))) self.err_handler.raise_exception(msg) df_sp['percent'] = df_sp['percent'].astype(float) unique_species = self.__unique(df_sp["species"]) if "Other" in unique_species: unique_species.append(unique_species.pop(unique_species.index("Other"))) labels = self.__unique(df_sp["sample"]) bottom = np.array([0]*len(labels)) if sort_y_by != None: if isinstance(sort_y_by, list): if len(unique_species) == len(sort_y_by): if len([s for s in unique_species if s in sort_y_by]) == len(unique_species) and len([s for s in sort_y_by if s in unique_species]) == len(unique_species): unique_species = sort_y_by else: valid_needed = [s for s in unique_species if s not in sort_y_by] invalid = [s for s in sort_y_by if s not in unique_species] msg = ("sort_y_by is missing the following species: " "{}".format(valid_needed)+" and was provided " "these invalid species: {}".format(invalid)) self.err_handler.raise_exception(msg) elif len(sort_y_by) < len(unique_species): msg = ("sort_y_by must have of all of the " "following species: {}".format(unique_species)+". " "You are missing {}".format([s for s in unique_species if s not in sort_y_by])) self.err_handler.raise_exception(msg) else: msg = ("sort_y_by can only have the " "following species: {}".format(unique_species)+".") self.err_handler.raise_exception(msg) elif sort_y_by == "alphabetical": if "Other" in unique_species: unique_species_no_other = [sp for sp in unique_species if sp != "Other"] unique_species_no_other = sorted(unique_species_no_other) unique_species = unique_species_no_other + ["Other"] else: unique_species = sorted(unique_species) else: self.err_handler.raise_exception("sort_y_by must be either None, 'alphabetical', " "or a list of species names.") # get colormap colors = get_colors(colormap, len(unique_species)) # convert rgba to hex colors = [matplotlib.colors.rgb2hex(c) for c in colors] df_sp["species"] = df_sp["species"].apply(html_chemname_format_AqEquil) unique_species = [html_chemname_format_AqEquil(sp) for sp in unique_species] # map each species to its color, e.g., # {'CO2': '#000000', 'HCO3-': '#1699d3', 'Other': '#736ca8'} dict_species_color = {sp:color for sp,color in zip(unique_species, colors)} if title == None: title = '<span style="font-size: 14px;">Species accounting for mass balance of {}</span>'.format(html_chemname_format_AqEquil(basis)) fig = px.bar(df_sp, x="sample", y="percent", color="species", width=plot_width*ppi, height=plot_height*ppi, labels={"sample": "sample", "percent": "mole %", "species": "species"}, category_orders={"species": unique_species, "sample": labels}, color_discrete_map=dict_species_color, template="simple_white", ) fig.update_layout(xaxis_tickangle=-45, xaxis_title=None, legend_title=None, title={'text':title, 'x':0.5, 'xanchor':'center'}, margin={"t": 40}, bargap=0, xaxis={'fixedrange':True}, yaxis={'fixedrange':True}) fig.update_traces(width=width, marker_line_width=0) save_as, save_format = self.__save_figure(fig, save_as, save_format, save_scale, plot_width, plot_height, ppi) config = {'displaylogo': False, 'modeBarButtonsToRemove': ['zoom2d', 'pan2d', 'select2d', 'lasso2d', 'zoomIn2d', 'zoomOut2d', 'autoScale2d', 'resetScale2d', 'toggleSpikelines'], 'toImageButtonOptions': { 'format': save_format, # one of png, svg, jpeg, webp 'filename': save_as, 'height': plot_height*ppi, 'width': plot_width*ppi, 'scale': save_scale, }, } if not interactive: config['staticPlot'] = True fig.show(config=config) def plot_solid_solutions(self, sample, title=None, width=0.9, colormap="WORM", affinity_plot=True, affinity_plot_colors=["blue", "orange"], plot_width=4, plot_height=4, ppi=122, save_as=None, save_format=None, save_scale=1, interactive=True): """ Plot fractions of minerals of hypothetical solid solutions in a sample. Parameters ---------- sample : str Name of the sample. title : str, optional Title of the plot. width : float, default 0.9 Width of bars. No space between bars if width=1.0. colormap : str, default "WORM" Name of the colormap to color the scatterpoints. Accepts "WORM", "colorblind", or matplotlib colormaps. See https://matplotlib.org/stable/tutorials/colors/colormaps.html The "colorblind" colormap is referenced from Wong, B. Points of view: Color blindness. Nat Methods 8, 441 (2011). https://doi.org/10.1038/nmeth.1618 affinity_plot : bool, default True Include the affinity subplot? affinity_plot_colors : list of two str, default ["blue", "orange"] Colors indicating positive and negative values in the affinity subplot, respectively. plot_width, plot_height : numeric, default 4 by 3 Width and height of the plot, in inches. Size of interactive plots is also determined by pixels per inch, set by the parameter `ppi`. ppi : numeric, default 122 Pixels per inch. Along with `plot_width` and `plot_height`, determines the size of interactive plots. save_as : str, optional Provide a filename to save this figure. Filetype of saved figure is determined by `save_format`. Note: interactive plots can be saved by clicking the 'Download plot' button in the plot's toolbar. save_format : str, default "png" Desired format of saved or downloaded figure. Can be 'png', 'jpg', 'jpeg', 'webp', 'svg', 'pdf', 'eps', 'json', or 'html'. If 'html', an interactive plot will be saved. Only 'png', 'svg', 'jpeg', and 'webp' can be downloaded with the 'download as' button in the toolbar of an interactive plot. save_scale : numeric, default 1 Multiply title/legend/axis/canvas sizes by this factor when saving the figure. interactive : bool, default True Return an interactive plot if True or a static plot if False. """ if sample not in self.sample_data.keys(): msg = ("The sample "+sample+" was not found in this speciation dataset." " Samples with solid solutions in this dataset include:"+str([s for s in self.sample_data.keys() if "solid_solutions" in self.sample_data[s].keys()])) self.err_handler.raise_exception(msg) try: self.sample_data[sample]["solid_solutions"] except: msg = ("Results for solid solutions could not be found for this " "sample. Samples with solid solutions in this speciation " "dataset include:"+str([s for s in self.sample_data.keys() if "solid_solutions" in self.sample_data[s].keys()])) self.err_handler.raise_exception(msg) if title == None: title = "Hypothetical solid solutions in " + sample df_full = copy.deepcopy(self.sample_data[sample]["solid_solutions"]) df = copy.deepcopy(df_full.dropna(subset=['x'])) df = df[df['x'] != 0] unique_minerals = self.__unique(df["mineral"]) # get colormap colors = get_colors(colormap, len(unique_minerals)) # convert rgba to hex colors = [matplotlib.colors.rgb2hex(c) for c in colors] # map each species to its color, e.g., # {'CO2': '#000000', 'HCO3-': '#1699d3', 'Other': '#736ca8'} dict_minerals_color = {sp:color for sp,color in zip(unique_minerals, colors)} solid_solutions = list(dict.fromkeys(df["solid solution"])) df_ss_only = df_full[df_full["x"].isnull()] mineral_dict = {m:[] for m in unique_minerals} for ss in solid_solutions: for m in unique_minerals: df_sub = df.loc[df["solid solution"] == ss,] frac = df_sub.loc[df_sub["mineral"] == m, "x"] if len(frac) > 0: mineral_dict[m] = mineral_dict[m] + list(frac) else: mineral_dict[m].append(0) if affinity_plot: rows = 2 specs = [[{"type": "bar"}], [{"type": "bar"}]] else: rows = 1 specs = [[{"type": "bar"}]] fig = make_subplots( rows=rows, cols=1, specs=specs, vertical_spacing = 0.05 ) # subplot 1 for m in unique_minerals[::-1]: fig.add_trace(go.Bar(name=m, x=solid_solutions, y=mineral_dict[m], marker_color=dict_minerals_color[m]), row=1, col=1) # subplot 2 if affinity_plot: fig.add_trace(go.Bar(name="ss", x=solid_solutions, y=df_ss_only["Aff, kcal"], marker_color=[affinity_plot_colors[0] if val > 0 else affinity_plot_colors[1] for val in df_ss_only["Aff, kcal"]], showlegend=False), row=2, col=1) fig.update_layout(barmode='stack', xaxis_tickangle=-45, xaxis_title=None, legend_title=None, title={'text':title, 'x':0.5, 'xanchor':'center'}, autosize=False, width=plot_width*ppi, height=plot_height*ppi, margin={"t": 40}, bargap=0, xaxis={'fixedrange':True}, yaxis={'fixedrange':True}, template="simple_white") fig.update_xaxes(tickangle=-45) fig['layout']['yaxis']['title']='Mole Fraction' if affinity_plot: fig['layout']['yaxis2']['title']='Affinity, kcal/mol' fig.update_xaxes(showticklabels=False) # hide all the xticks fig.update_xaxes(showticklabels=True, row=2, col=1) save_as, save_format = self.__save_figure(fig, save_as, save_format, save_scale, plot_width, plot_height, ppi) config = {'displaylogo': False, 'modeBarButtonsToRemove': ['zoom2d', 'pan2d', 'select2d', 'lasso2d', 'zoomIn2d', 'zoomOut2d', 'autoScale2d', 'resetScale2d', 'toggleSpikelines'], 'toImageButtonOptions': { 'format': save_format, # one of png, svg, jpeg, webp 'filename': save_as, 'height': plot_height*ppi, 'width': plot_width*ppi, 'scale': save_scale, }, } if not interactive: config['staticPlot'] = True fig.show(config=config)Methods
def barplot(self, y, title=None, convert_log=True, show_missing=True, plot_width=4, plot_height=3, ppi=122, colormap='WORM', save_as=None, save_format=None, save_scale=1, interactive=True)-
Show a bar plot to vizualize one or more variables across all samples.
Parameters
y:strorlistofstr- Name (or list of names) of the variables to plot. Valid variables are columns in the speciation report.
title:str, optional- Title of the plot.
convert_log:bool, defaultTrue- Convert units "log_activity", "log_molality", "log_gamma", and "log_fugacity" to "activity", "molality", "gamma", and "fugacity", respectively?
show_missing:bool, defaultTrue- Show samples that do not have bars?
plot_width,plot_height:numeric, default4 by 3- Width and height of the plot, in inches.
ppi:numeric, default122- Pixels per inch. Along with
plot_widthandplot_height, determines the size of interactive plots. colormap:str, default"WORM"- Name of the colormap to color the scatterpoints. Accepts "WORM", "colorblind", or matplotlib colormaps. See https://matplotlib.org/stable/tutorials/colors/colormaps.html The "colorblind" colormap is referenced from Wong, B. Points of view: Color blindness. Nat Methods 8, 441 (2011). https://doi.org/10.1038/nmeth.1618
save_as:str, optional- Provide a filename to save this figure. Filetype of saved figure is
determined by
save_format. Note: interactive plots can be saved by clicking the 'Download plot' button in the plot's toolbar. save_format:str, default"png"- Desired format of saved or downloaded figure. Can be 'png', 'jpg', 'jpeg', 'webp', 'svg', 'pdf', 'eps', 'json', or 'html'. If 'html', an interactive plot will be saved. Only 'png', 'svg', 'jpeg', and 'webp' can be downloaded with the 'download as' button in the toolbar of an interactive plot.
save_scale:numeric, default1- Multiply title/legend/axis/canvas sizes by this factor when saving the figure.
interactive:bool, defaultTrue- Return an interactive plot if True or a static plot if False.
Expand source code
def barplot(self, y, title=None, convert_log=True, show_missing=True, plot_width=4, plot_height=3, ppi=122, colormap="WORM", save_as=None, save_format=None, save_scale=1, interactive=True): """ Show a bar plot to vizualize one or more variables across all samples. Parameters ---------- y : str or list of str Name (or list of names) of the variables to plot. Valid variables are columns in the speciation report. title : str, optional Title of the plot. convert_log : bool, default True Convert units "log_activity", "log_molality", "log_gamma", and "log_fugacity" to "activity", "molality", "gamma", and "fugacity", respectively? show_missing : bool, default True Show samples that do not have bars? plot_width, plot_height : numeric, default 4 by 3 Width and height of the plot, in inches. ppi : numeric, default 122 Pixels per inch. Along with `plot_width` and `plot_height`, determines the size of interactive plots. colormap : str, default "WORM" Name of the colormap to color the scatterpoints. Accepts "WORM", "colorblind", or matplotlib colormaps. See https://matplotlib.org/stable/tutorials/colors/colormaps.html The "colorblind" colormap is referenced from Wong, B. Points of view: Color blindness. Nat Methods 8, 441 (2011). https://doi.org/10.1038/nmeth.1618 save_as : str, optional Provide a filename to save this figure. Filetype of saved figure is determined by `save_format`. Note: interactive plots can be saved by clicking the 'Download plot' button in the plot's toolbar. save_format : str, default "png" Desired format of saved or downloaded figure. Can be 'png', 'jpg', 'jpeg', 'webp', 'svg', 'pdf', 'eps', 'json', or 'html'. If 'html', an interactive plot will be saved. Only 'png', 'svg', 'jpeg', and 'webp' can be downloaded with the 'download as' button in the toolbar of an interactive plot. save_scale : numeric, default 1 Multiply title/legend/axis/canvas sizes by this factor when saving the figure. interactive : bool, default True Return an interactive plot if True or a static plot if False. """ if not isinstance(y, list): y = [y] colors = get_colors(colormap, len(y)) # convert rgba to hex colors = [matplotlib.colors.rgb2hex(c) for c in colors] # map each species to its color, e.g., # {'CO2': '#000000', 'HCO3-': '#1699d3', 'Other': '#736ca8'} dict_species_color = {sp:color for sp,color in zip(y, colors)} # html format color dict key names dict_species_color = {html_chemname_format_AqEquil(k):v for k,v in dict_species_color.items()} y_cols = self.lookup(y) if not show_missing: y_cols = y_cols.dropna(how='all') # this df will keep subheaders x = y_cols.index # names of samples df = self.lookup(["name"]+y).copy() if not show_missing: df = df.dropna(how='all') # this df will lose subheaders (flattened) df.loc[:, "name"] = df.index df.columns = df.columns.get_level_values(0) for i, yi in enumerate(y): y_col = y_cols.iloc[:, y_cols.columns.get_level_values(0)==yi] try: subheader = y_col.columns.get_level_values(1)[0] except: msg = ("Could not find '{}' ".format(yi)+"in the speciation " "report. Available variables include " "{}".format(list(set(self.report.columns.get_level_values(0))))) self.err_handler.raise_exception(msg) try: unit_type, unit = self.__get_unit_info(subheader) except: unit_type = "" unit = "" try: y_vals = [float(y0[0]) if y0[0] != 'NA' else float("nan") for y0 in y_col.values.tolist()] except: msg = ("One or more the values belonging to " "'{}' are non-numeric and cannot be plotted.".format(y_col.columns.get_level_values(0)[0])) self.err_handler.raise_exception(msg) if convert_log and [abs(y0) for y0 in y_vals] != y_vals: # convert to bar-friendly units if possible if subheader in ["log_activity", "log_molality", "log_gamma", "log_fugacity"]: y_plot, out_unit = self.__convert_aq_units_to_log_friendly(yi, rows=x) unit_type, unit = self.__get_unit_info(out_unit) else: y_plot = y_vals else: y_plot = y_vals if i == 0: subheader_previous = subheader unit_type_previous = unit_type if unit_type != unit_type_previous and i != 0: msg = ("{} has a different unit of measurement ".format(yi)+"" "({}) than {} ({}). ".format(unit, yi_previous, unit_type_previous)+"" "Plotted variables must share units.") self.err_handler.raise_exception(msg) elif "activity" in subheader.lower() and "molality" in subheader_previous.lower(): msg = ("{} has a different unit of measurement ".format(yi)+"" "({}) than {} ({}). ".format("activity", yi_previous, "molality")+"" "Plotted variables must share units.") self.err_handler.raise_exception(msg) elif "molality" in subheader.lower() and "activity" in subheader_previous.lower(): msg = ("{} has a different unit of measurement ".format(yi)+"" "({}) than {} ({}). ".format("molality", yi_previous, "activity")+"" "Plotted variables must share units.") self.err_handler.raise_exception(msg) yi_previous = copy.deepcopy(yi) unit_type_previous = copy.deepcopy(unit_type) subheader_previous = copy.deepcopy(subheader) df.loc[:, yi] = y_plot if len(y) > 1: if unit != "": ylabel = "{} [{}]".format(unit_type, unit) else: ylabel = unit_type else: if 'pH' in y: ylabel = 'pH' elif 'Temperature' in y: ylabel = 'Temperature [°C]' else: if unit != "": ylabel = "{} {} [{}]".format(html_chemname_format_AqEquil(y[0]), unit_type, unit) else: ylabel = "{} {}".format(html_chemname_format_AqEquil(y[0]), unit_type) df = pd.melt(df, id_vars=["name"], value_vars=y) df = df.rename(columns={"Sample": "y_variable", "value": "y_value"}) df['y_variable'] = df['y_variable'].apply(html_chemname_format_AqEquil) if unit_type == "energy supply" or unit_type == "affinity": # get formatted reactions to display if not isinstance(self.reactions_for_plotting, pd.DataFrame): self.reactions_for_plotting = self.show_redox_reactions(formatted=True, charge_sign_at_end=False, show=False, simplify=True) y_find = [yi.replace("_energy", "").replace("_affinity", "") for yi in y] rxns = self.reactions_for_plotting.loc[y_find, :]["reaction"].tolist()*len(x) if len(y) == 1: ylabel = "{}<br>{} [{}]".format(html_chemname_format_AqEquil(y_find[0]), unit_type, unit) # customdata for displaying reactions has to be here instead of in update_traces fig = px.bar(df, x="name", y="y_value", height=plot_height*ppi, width=plot_width*ppi, color='y_variable', barmode='group', labels={'y_value': ylabel}, template="simple_white", color_discrete_map=dict_species_color, custom_data=[rxns]) fig.update_traces(hovertemplate = "%{x} <br>"+ylabel+": %{y}<br>%{customdata[0]}") else: fig = px.bar(df, x="name", y="y_value", height=plot_height*ppi, width=plot_width*ppi, color='y_variable', barmode='group', labels={'y_value': ylabel}, template="simple_white", color_discrete_map=dict_species_color) fig.update_traces(hovertemplate = "%{x} <br>"+ylabel+": %{y}") fig.update_layout(xaxis_tickangle=-45, xaxis_title=None, title={'text':title, 'x':0.5, 'xanchor':'center'}, legend_title=None, margin={"t": 40}, xaxis={'fixedrange':True}, yaxis={'fixedrange':True, 'exponentformat':'power'}) if len(y) == 1: fig.update_layout(showlegend=False) save_as, save_format = self.__save_figure(fig, save_as, save_format, save_scale, plot_width, plot_height, ppi) config = {'displaylogo': False, 'modeBarButtonsToRemove': ['zoom2d', 'pan2d', 'select2d', 'lasso2d', 'zoomIn2d', 'zoomOut2d', 'autoScale2d', 'resetScale2d', 'toggleSpikelines'], 'toImageButtonOptions': { 'format': save_format, 'filename': save_as, 'height': plot_height*ppi, 'width': plot_width*ppi, 'scale': save_scale, }, } if not interactive: config['staticPlot'] = True fig.show(config=config) def lookup(self, col=None)-
Look up desired columns in the speciation report.
Parameters
col:strorlistofstr- Leave blank to get a list of section names in the report:
speciation.lookup()Provide the name of a section to look up the names of columns in that section of the report:speciation.lookup("aq_distribution")Provide a column name (or a list of column names) to retrieve the column from the report:speciation.lookup(["Temperature", "O2"])
Returns
Pandas dataframeorlistofstr- If a column name (or list of column names) is provided, returns the speciation report with only the desired column(s). Otherwise returns a list of section names (if no arguments are provided), or a list of columns in a section (if a section name is provided).
Expand source code
def lookup(self, col=None): """ Look up desired columns in the speciation report. Parameters ---------- col : str or list of str Leave blank to get a list of section names in the report: ```speciation.lookup()``` Provide the name of a section to look up the names of columns in that section of the report: ```speciation.lookup("aq_distribution")``` Provide a column name (or a list of column names) to retrieve the column from the report: ```speciation.lookup(["Temperature", "O2"])``` Returns ---------- Pandas dataframe or list of str If a column name (or list of column names) is provided, returns the speciation report with only the desired column(s). Otherwise returns a list of section names (if no arguments are provided), or a list of columns in a section (if a section name is provided). """ names_length = len(self.report_divs.names) if col==None and names_length>0: return list(self.report_divs.names) if names_length>0: if col in list(self.report_divs.names): return list(self.report_divs.rx2(col)) if isinstance(col, str): col = [col] return self.report.iloc[:, self.report.columns.get_level_values(0).isin(set(col))] def plot_mass_contribution(self, basis, title=None, sort_by=None, ascending=True, sort_y_by=None, width=0.9, colormap='WORM', plot_width=4, plot_height=3, ppi=122, save_as=None, save_format=None, save_scale=1, interactive=True)-
Plot basis species contributions to mass balance of aqueous species across all samples.
Parameters
basis:str- Name of the basis species.
title:str, optional- Title of the plot.
sort_by:str, optional- Name of the variable used to sort samples. Variable names must be taken from the speciation report column names. No sorting is done by default.
ascending:bool, defaultTrue- Should sample sorting be in ascending order? Descending if False.
Ignored unless
sort_byis defined. sort_y_by:listofstror'alphabetical', optional- List of species names in the order that they should be stacked, from the bottom of the plot to the top. 'alphabetical' will sort species alphabetically.
width:float, default0.9- Width of bars. No space between bars if width=1.0.
colormap:str, default"WORM"- Name of the colormap to color the scatterpoints. Accepts "WORM", "colorblind", or matplotlib colormaps. See https://matplotlib.org/stable/tutorials/colors/colormaps.html The "colorblind" colormap is referenced from Wong, B. Points of view: Color blindness. Nat Methods 8, 441 (2011). https://doi.org/10.1038/nmeth.1618
plot_width,plot_height:numeric, default4 by 3- Width and height of the plot, in inches. Size of interactive plots
is also determined by pixels per inch, set by the parameter
ppi. ppi:numeric, default122- Pixels per inch. Along with
plot_widthandplot_height, determines the size of interactive plots. save_as:str, optional- Provide a filename to save this figure. Filetype of saved figure is
determined by
save_format. Note: interactive plots can be saved by clicking the 'Download plot' button in the plot's toolbar. save_format:str, default"png"- Desired format of saved or downloaded figure. Can be 'png', 'jpg', 'jpeg', 'webp', 'svg', 'pdf', 'eps', 'json', or 'html'. If 'html', an interactive plot will be saved. Only 'png', 'svg', 'jpeg', and 'webp' can be downloaded with the 'download as' button in the toolbar of an interactive plot.
save_scale:numeric, default1- Multiply title/legend/axis/canvas sizes by this factor when saving the figure.
interactive:bool, defaultTrue- Return an interactive plot if True or a static plot if False.
Expand source code
def plot_mass_contribution(self, basis, title=None, sort_by=None, ascending=True, sort_y_by=None, width=0.9, colormap="WORM", plot_width=4, plot_height=3, ppi=122, save_as=None, save_format=None, save_scale=1, interactive=True): """ Plot basis species contributions to mass balance of aqueous species across all samples. Parameters ---------- basis : str Name of the basis species. title : str, optional Title of the plot. sort_by : str, optional Name of the variable used to sort samples. Variable names must be taken from the speciation report column names. No sorting is done by default. ascending : bool, default True Should sample sorting be in ascending order? Descending if False. Ignored unless `sort_by` is defined. sort_y_by : list of str or 'alphabetical', optional List of species names in the order that they should be stacked, from the bottom of the plot to the top. 'alphabetical' will sort species alphabetically. width : float, default 0.9 Width of bars. No space between bars if width=1.0. colormap : str, default "WORM" Name of the colormap to color the scatterpoints. Accepts "WORM", "colorblind", or matplotlib colormaps. See https://matplotlib.org/stable/tutorials/colors/colormaps.html The "colorblind" colormap is referenced from Wong, B. Points of view: Color blindness. Nat Methods 8, 441 (2011). https://doi.org/10.1038/nmeth.1618 plot_width, plot_height : numeric, default 4 by 3 Width and height of the plot, in inches. Size of interactive plots is also determined by pixels per inch, set by the parameter `ppi`. ppi : numeric, default 122 Pixels per inch. Along with `plot_width` and `plot_height`, determines the size of interactive plots. save_as : str, optional Provide a filename to save this figure. Filetype of saved figure is determined by `save_format`. Note: interactive plots can be saved by clicking the 'Download plot' button in the plot's toolbar. save_format : str, default "png" Desired format of saved or downloaded figure. Can be 'png', 'jpg', 'jpeg', 'webp', 'svg', 'pdf', 'eps', 'json', or 'html'. If 'html', an interactive plot will be saved. Only 'png', 'svg', 'jpeg', and 'webp' can be downloaded with the 'download as' button in the toolbar of an interactive plot. save_scale : numeric, default 1 Multiply title/legend/axis/canvas sizes by this factor when saving the figure. interactive : bool, default True Return an interactive plot if True or a static plot if False. """ try: self.mass_contribution except: msg = ("Results for basis species contributions to aqueous mass " "balance could not be found. Ensure that " "get_mass_contribution = True when running speciate().") self.err_handler.raise_exception(msg) if basis not in set(self.mass_contribution['basis']): msg = ("The basis species {} ".format(basis)+"could not be found " "among available basis species: " "{}".format(str(list(set(self.mass_contribution['basis']))))) self.err_handler.raise_exception(msg) df_sp = copy.deepcopy(self.mass_contribution.loc[self.mass_contribution['basis'] == basis]) if sort_by != None: if sort_by in self.report.columns.get_level_values(0): sort_col = self.lookup(sort_by) sort_by_unit = sort_col.columns.get_level_values(1)[0] sort_index = sort_col.sort_values([(sort_by, sort_by_unit)], ascending=ascending).index df_list = [] for i in sort_index: df_list.append(df_sp[df_sp['sample']==i]) df_sp = pd.concat(df_list) else: msg = ("Could not find {}".format(sort_by)+" in the " "speciation report. Available variables include " "{}".format(list(self.report.columns.get_level_values(0)))) self.err_handler.raise_exception(msg) df_sp['percent'] = df_sp['percent'].astype(float) unique_species = self.__unique(df_sp["species"]) if "Other" in unique_species: unique_species.append(unique_species.pop(unique_species.index("Other"))) labels = self.__unique(df_sp["sample"]) bottom = np.array([0]*len(labels)) if sort_y_by != None: if isinstance(sort_y_by, list): if len(unique_species) == len(sort_y_by): if len([s for s in unique_species if s in sort_y_by]) == len(unique_species) and len([s for s in sort_y_by if s in unique_species]) == len(unique_species): unique_species = sort_y_by else: valid_needed = [s for s in unique_species if s not in sort_y_by] invalid = [s for s in sort_y_by if s not in unique_species] msg = ("sort_y_by is missing the following species: " "{}".format(valid_needed)+" and was provided " "these invalid species: {}".format(invalid)) self.err_handler.raise_exception(msg) elif len(sort_y_by) < len(unique_species): msg = ("sort_y_by must have of all of the " "following species: {}".format(unique_species)+". " "You are missing {}".format([s for s in unique_species if s not in sort_y_by])) self.err_handler.raise_exception(msg) else: msg = ("sort_y_by can only have the " "following species: {}".format(unique_species)+".") self.err_handler.raise_exception(msg) elif sort_y_by == "alphabetical": if "Other" in unique_species: unique_species_no_other = [sp for sp in unique_species if sp != "Other"] unique_species_no_other = sorted(unique_species_no_other) unique_species = unique_species_no_other + ["Other"] else: unique_species = sorted(unique_species) else: self.err_handler.raise_exception("sort_y_by must be either None, 'alphabetical', " "or a list of species names.") # get colormap colors = get_colors(colormap, len(unique_species)) # convert rgba to hex colors = [matplotlib.colors.rgb2hex(c) for c in colors] df_sp["species"] = df_sp["species"].apply(html_chemname_format_AqEquil) unique_species = [html_chemname_format_AqEquil(sp) for sp in unique_species] # map each species to its color, e.g., # {'CO2': '#000000', 'HCO3-': '#1699d3', 'Other': '#736ca8'} dict_species_color = {sp:color for sp,color in zip(unique_species, colors)} if title == None: title = '<span style="font-size: 14px;">Species accounting for mass balance of {}</span>'.format(html_chemname_format_AqEquil(basis)) fig = px.bar(df_sp, x="sample", y="percent", color="species", width=plot_width*ppi, height=plot_height*ppi, labels={"sample": "sample", "percent": "mole %", "species": "species"}, category_orders={"species": unique_species, "sample": labels}, color_discrete_map=dict_species_color, template="simple_white", ) fig.update_layout(xaxis_tickangle=-45, xaxis_title=None, legend_title=None, title={'text':title, 'x':0.5, 'xanchor':'center'}, margin={"t": 40}, bargap=0, xaxis={'fixedrange':True}, yaxis={'fixedrange':True}) fig.update_traces(width=width, marker_line_width=0) save_as, save_format = self.__save_figure(fig, save_as, save_format, save_scale, plot_width, plot_height, ppi) config = {'displaylogo': False, 'modeBarButtonsToRemove': ['zoom2d', 'pan2d', 'select2d', 'lasso2d', 'zoomIn2d', 'zoomOut2d', 'autoScale2d', 'resetScale2d', 'toggleSpikelines'], 'toImageButtonOptions': { 'format': save_format, # one of png, svg, jpeg, webp 'filename': save_as, 'height': plot_height*ppi, 'width': plot_width*ppi, 'scale': save_scale, }, } if not interactive: config['staticPlot'] = True fig.show(config=config) def plot_mineral_saturation(self, sample_name, title=None, mineral_sat_type='affinity', plot_width=4, plot_height=3, ppi=122, colors=['blue', 'orange'], save_as=None, save_format=None, save_scale=1, interactive=True, hide_traceback=True)-
Vizualize mineral saturation states in a sample as a bar plot.
Parameters
sample_name:str- Name of the sample to plot.
title:str, optional- Title of the plot.
mineral_sat_type:str, default"affinity"- Metric for mineral saturation state to plot. Can be "affinity" or "logQoverK".
colors:listoftwo str, default["blue", "orange"]- Sets the color of the bars representing supersaturated and undersaturated states, respectively.
save_as:str, optional- Provide a filename to save this figure. Filetype of saved figure is
determined by
save_format. Note: interactive plots can be saved by clicking the 'Download plot' button in the plot's toolbar. save_format:str, default"png"- Desired format of saved or downloaded figure. Can be 'png', 'jpg', 'jpeg', 'webp', 'svg', 'pdf', 'eps', 'json', or 'html'. If 'html', an interactive plot will be saved. Only 'png', 'svg', 'jpeg', and 'webp' can be downloaded with the 'download as' button in the toolbar of an interactive plot.
save_scale:numeric, default1- Multiply title/legend/axis/canvas sizes by this factor when saving the figure.
interactive:bool, defaultTrue- Return an interactive plot if True or a static plot if False.
Expand source code
def plot_mineral_saturation(self, sample_name, title=None, mineral_sat_type="affinity", plot_width=4, plot_height=3, ppi=122, colors=["blue", "orange"], save_as=None, save_format=None, save_scale=1, interactive=True, hide_traceback=True): """ Vizualize mineral saturation states in a sample as a bar plot. Parameters ---------- sample_name : str Name of the sample to plot. title : str, optional Title of the plot. mineral_sat_type : str, default "affinity" Metric for mineral saturation state to plot. Can be "affinity" or "logQoverK". colors : list of two str, default ["blue", "orange"] Sets the color of the bars representing supersaturated and undersaturated states, respectively. save_as : str, optional Provide a filename to save this figure. Filetype of saved figure is determined by `save_format`. Note: interactive plots can be saved by clicking the 'Download plot' button in the plot's toolbar. save_format : str, default "png" Desired format of saved or downloaded figure. Can be 'png', 'jpg', 'jpeg', 'webp', 'svg', 'pdf', 'eps', 'json', or 'html'. If 'html', an interactive plot will be saved. Only 'png', 'svg', 'jpeg', and 'webp' can be downloaded with the 'download as' button in the toolbar of an interactive plot. save_scale : numeric, default 1 Multiply title/legend/axis/canvas sizes by this factor when saving the figure. interactive : bool, default True Return an interactive plot if True or a static plot if False. """ if sample_name not in self.report.index: msg = ("Could not find '{}'".format(sample_name)+" among sample " "names in the speciation report. Sample names include " "{}".format(list(self.report.index))) err_handler.raise_exception(msg) if isinstance(self.sample_data[sample_name].get('mineral_sat', None), pd.DataFrame): mineral_data = self.sample_data[sample_name]['mineral_sat'][mineral_sat_type].astype(float).sort_values(ascending=False) x = mineral_data.index else: msg = ("This sample does not have mineral saturation state data." "To generate this data, ensure get_mineral_sat=True when " "running speciate(), or ensure this sample has " "mineral-forming basis species.") self.err_handler.raise_exception(msg) color_list = [colors[0] if m >= 0 else colors[1] for m in mineral_data] if mineral_sat_type == "affinity": ylabel = 'affinity, kcal/mol' if mineral_sat_type == "logQoverK": ylabel = 'logQ/K' if title==None: title = sample_name + " mineral saturation index" df = pd.DataFrame(mineral_data) fig = px.bar(df, x=df.index, y="affinity", height=plot_height*ppi, width=plot_width*ppi, labels={'affinity': ylabel}, template="simple_white") fig.update_traces(hovertemplate = "%{x} <br>"+ylabel+": %{y}", marker_color=color_list) fig.update_layout(xaxis_tickangle=-45, xaxis_title=None, title={'text':title, 'x':0.5, 'xanchor':'center'}, margin={"t":40}, xaxis={'fixedrange':True}, yaxis={'fixedrange':True, 'exponentformat':'power'}) save_as, save_format = self.__save_figure(fig, save_as, save_format, save_scale, plot_width, plot_height, ppi) config = {'displaylogo': False, 'modeBarButtonsToRemove': ['zoom2d', 'pan2d', 'select2d', 'lasso2d', 'zoomIn2d', 'zoomOut2d', 'autoScale2d', 'resetScale2d', 'toggleSpikelines'], 'toImageButtonOptions': { 'format': save_format, # one of png, svg, jpeg, webp 'filename': save_as, 'height': plot_height*ppi, 'width': plot_width*ppi, 'scale': save_scale, }, } if not interactive: config['staticPlot'] = True fig.show(config=config) def plot_solid_solutions(self, sample, title=None, width=0.9, colormap='WORM', affinity_plot=True, affinity_plot_colors=['blue', 'orange'], plot_width=4, plot_height=4, ppi=122, save_as=None, save_format=None, save_scale=1, interactive=True)-
Plot fractions of minerals of hypothetical solid solutions in a sample.
Parameters
sample:str- Name of the sample.
title:str, optional- Title of the plot.
width:float, default0.9- Width of bars. No space between bars if width=1.0.
colormap:str, default"WORM"- Name of the colormap to color the scatterpoints. Accepts "WORM", "colorblind", or matplotlib colormaps. See https://matplotlib.org/stable/tutorials/colors/colormaps.html The "colorblind" colormap is referenced from Wong, B. Points of view: Color blindness. Nat Methods 8, 441 (2011). https://doi.org/10.1038/nmeth.1618
affinity_plot:bool, defaultTrue- Include the affinity subplot?
affinity_plot_colors:listoftwo str, default["blue", "orange"]- Colors indicating positive and negative values in the affinity subplot, respectively.
plot_width,plot_height:numeric, default4 by 3- Width and height of the plot, in inches. Size of interactive plots
is also determined by pixels per inch, set by the parameter
ppi. ppi:numeric, default122- Pixels per inch. Along with
plot_widthandplot_height, determines the size of interactive plots. save_as:str, optional- Provide a filename to save this figure. Filetype of saved figure is
determined by
save_format. Note: interactive plots can be saved by clicking the 'Download plot' button in the plot's toolbar. save_format:str, default"png"- Desired format of saved or downloaded figure. Can be 'png', 'jpg', 'jpeg', 'webp', 'svg', 'pdf', 'eps', 'json', or 'html'. If 'html', an interactive plot will be saved. Only 'png', 'svg', 'jpeg', and 'webp' can be downloaded with the 'download as' button in the toolbar of an interactive plot.
save_scale:numeric, default1- Multiply title/legend/axis/canvas sizes by this factor when saving the figure.
interactive:bool, defaultTrue- Return an interactive plot if True or a static plot if False.
Expand source code
def plot_solid_solutions(self, sample, title=None, width=0.9, colormap="WORM", affinity_plot=True, affinity_plot_colors=["blue", "orange"], plot_width=4, plot_height=4, ppi=122, save_as=None, save_format=None, save_scale=1, interactive=True): """ Plot fractions of minerals of hypothetical solid solutions in a sample. Parameters ---------- sample : str Name of the sample. title : str, optional Title of the plot. width : float, default 0.9 Width of bars. No space between bars if width=1.0. colormap : str, default "WORM" Name of the colormap to color the scatterpoints. Accepts "WORM", "colorblind", or matplotlib colormaps. See https://matplotlib.org/stable/tutorials/colors/colormaps.html The "colorblind" colormap is referenced from Wong, B. Points of view: Color blindness. Nat Methods 8, 441 (2011). https://doi.org/10.1038/nmeth.1618 affinity_plot : bool, default True Include the affinity subplot? affinity_plot_colors : list of two str, default ["blue", "orange"] Colors indicating positive and negative values in the affinity subplot, respectively. plot_width, plot_height : numeric, default 4 by 3 Width and height of the plot, in inches. Size of interactive plots is also determined by pixels per inch, set by the parameter `ppi`. ppi : numeric, default 122 Pixels per inch. Along with `plot_width` and `plot_height`, determines the size of interactive plots. save_as : str, optional Provide a filename to save this figure. Filetype of saved figure is determined by `save_format`. Note: interactive plots can be saved by clicking the 'Download plot' button in the plot's toolbar. save_format : str, default "png" Desired format of saved or downloaded figure. Can be 'png', 'jpg', 'jpeg', 'webp', 'svg', 'pdf', 'eps', 'json', or 'html'. If 'html', an interactive plot will be saved. Only 'png', 'svg', 'jpeg', and 'webp' can be downloaded with the 'download as' button in the toolbar of an interactive plot. save_scale : numeric, default 1 Multiply title/legend/axis/canvas sizes by this factor when saving the figure. interactive : bool, default True Return an interactive plot if True or a static plot if False. """ if sample not in self.sample_data.keys(): msg = ("The sample "+sample+" was not found in this speciation dataset." " Samples with solid solutions in this dataset include:"+str([s for s in self.sample_data.keys() if "solid_solutions" in self.sample_data[s].keys()])) self.err_handler.raise_exception(msg) try: self.sample_data[sample]["solid_solutions"] except: msg = ("Results for solid solutions could not be found for this " "sample. Samples with solid solutions in this speciation " "dataset include:"+str([s for s in self.sample_data.keys() if "solid_solutions" in self.sample_data[s].keys()])) self.err_handler.raise_exception(msg) if title == None: title = "Hypothetical solid solutions in " + sample df_full = copy.deepcopy(self.sample_data[sample]["solid_solutions"]) df = copy.deepcopy(df_full.dropna(subset=['x'])) df = df[df['x'] != 0] unique_minerals = self.__unique(df["mineral"]) # get colormap colors = get_colors(colormap, len(unique_minerals)) # convert rgba to hex colors = [matplotlib.colors.rgb2hex(c) for c in colors] # map each species to its color, e.g., # {'CO2': '#000000', 'HCO3-': '#1699d3', 'Other': '#736ca8'} dict_minerals_color = {sp:color for sp,color in zip(unique_minerals, colors)} solid_solutions = list(dict.fromkeys(df["solid solution"])) df_ss_only = df_full[df_full["x"].isnull()] mineral_dict = {m:[] for m in unique_minerals} for ss in solid_solutions: for m in unique_minerals: df_sub = df.loc[df["solid solution"] == ss,] frac = df_sub.loc[df_sub["mineral"] == m, "x"] if len(frac) > 0: mineral_dict[m] = mineral_dict[m] + list(frac) else: mineral_dict[m].append(0) if affinity_plot: rows = 2 specs = [[{"type": "bar"}], [{"type": "bar"}]] else: rows = 1 specs = [[{"type": "bar"}]] fig = make_subplots( rows=rows, cols=1, specs=specs, vertical_spacing = 0.05 ) # subplot 1 for m in unique_minerals[::-1]: fig.add_trace(go.Bar(name=m, x=solid_solutions, y=mineral_dict[m], marker_color=dict_minerals_color[m]), row=1, col=1) # subplot 2 if affinity_plot: fig.add_trace(go.Bar(name="ss", x=solid_solutions, y=df_ss_only["Aff, kcal"], marker_color=[affinity_plot_colors[0] if val > 0 else affinity_plot_colors[1] for val in df_ss_only["Aff, kcal"]], showlegend=False), row=2, col=1) fig.update_layout(barmode='stack', xaxis_tickangle=-45, xaxis_title=None, legend_title=None, title={'text':title, 'x':0.5, 'xanchor':'center'}, autosize=False, width=plot_width*ppi, height=plot_height*ppi, margin={"t": 40}, bargap=0, xaxis={'fixedrange':True}, yaxis={'fixedrange':True}, template="simple_white") fig.update_xaxes(tickangle=-45) fig['layout']['yaxis']['title']='Mole Fraction' if affinity_plot: fig['layout']['yaxis2']['title']='Affinity, kcal/mol' fig.update_xaxes(showticklabels=False) # hide all the xticks fig.update_xaxes(showticklabels=True, row=2, col=1) save_as, save_format = self.__save_figure(fig, save_as, save_format, save_scale, plot_width, plot_height, ppi) config = {'displaylogo': False, 'modeBarButtonsToRemove': ['zoom2d', 'pan2d', 'select2d', 'lasso2d', 'zoomIn2d', 'zoomOut2d', 'autoScale2d', 'resetScale2d', 'toggleSpikelines'], 'toImageButtonOptions': { 'format': save_format, # one of png, svg, jpeg, webp 'filename': save_as, 'height': plot_height*ppi, 'width': plot_width*ppi, 'scale': save_scale, }, } if not interactive: config['staticPlot'] = True fig.show(config=config) def save(self, filename, messages=True)-
Save the speciation as a '.speciation' file to your current working directory. This file can be loaded with
AqEquil.load(filename).Parameters
filename:str- The desired name of the file.
messages:str- Print a message confirming the save?
Expand source code
def save(self, filename, messages=True): """ Save the speciation as a '.speciation' file to your current working directory. This file can be loaded with `AqEquil.load(filename)`. Parameters ---------- filename : str The desired name of the file. messages : str Print a message confirming the save? """ if filename[-11:] != '.speciation': filename = filename + '.speciation' with open(filename, 'wb') as handle: dill.dump(self, handle, protocol=dill.HIGHEST_PROTOCOL) if messages: print("Saved as '{}'".format(filename)) def scatterplot(self, x='pH', y='Temperature', title=None, plot_width=4, plot_height=3, ppi=122, fill_alpha=0.7, point_size=10, colormap='WORM', save_as=None, save_format=None, save_scale=1, interactive=True)-
Vizualize two or more sample variables with a scatterplot.
Parameters
x,y:str, defaultfor x is "pH", defaultfor y is "Temperature"- Names of the variables to plot against each other. Valid variables
are columns in the speciation report.
ycan be a list of of variable names for a multi-series scatterplot. title:str, optional- Title of the plot.
plot_width,plot_height:numeric, default4 by 3- Width and height of the plot, in inches. Size of interactive plots
is also determined by pixels per inch, set by the parameter
ppi. ppi:numeric, default122- Pixels per inch. Along with
plot_widthandplot_height, determines the size of interactive plots. fill_alpha:numeric, default0.7- Transparency of scatterpoint area fill.
point_size:numeric, default10- Size of scatterpoints.
colormap:str, default"WORM"- Name of the colormap to color the scatterpoints. Accepts "WORM", "colorblind", or matplotlib colormaps. See https://matplotlib.org/stable/tutorials/colors/colormaps.html The "colorblind" colormap is referenced from Wong, B. Points of view: Color blindness. Nat Methods 8, 441 (2011). https://doi.org/10.1038/nmeth.1618
save_as:str, optional- Provide a filename to save this figure. Filetype of saved figure is
determined by
save_format. Note: interactive plots can be saved by clicking the 'Download plot' button in the plot's toolbar. save_format:str, default"png"- Desired format of saved or downloaded figure. Can be 'png', 'jpg', 'jpeg', 'webp', 'svg', 'pdf', 'eps', 'json', or 'html'. If 'html', an interactive plot will be saved. Only 'png', 'svg', 'jpeg', and 'webp' can be downloaded with the 'download as' button in the toolbar of an interactive plot.
save_scale:numeric, default1- Multiply title/legend/axis/canvas sizes by this factor when saving the figure.
interactive:bool, defaultTrue- Return an interactive plot if True or a static plot if False.
Expand source code
def scatterplot(self, x="pH", y="Temperature", title=None, plot_width=4, plot_height=3, ppi=122, fill_alpha=0.7, point_size=10, colormap="WORM", save_as=None, save_format=None, save_scale=1, interactive=True): """ Vizualize two or more sample variables with a scatterplot. Parameters ---------- x, y : str, default for x is "pH", default for y is "Temperature" Names of the variables to plot against each other. Valid variables are columns in the speciation report. `y` can be a list of of variable names for a multi-series scatterplot. title : str, optional Title of the plot. plot_width, plot_height : numeric, default 4 by 3 Width and height of the plot, in inches. Size of interactive plots is also determined by pixels per inch, set by the parameter `ppi`. ppi : numeric, default 122 Pixels per inch. Along with `plot_width` and `plot_height`, determines the size of interactive plots. fill_alpha : numeric, default 0.7 Transparency of scatterpoint area fill. point_size : numeric, default 10 Size of scatterpoints. colormap : str, default "WORM" Name of the colormap to color the scatterpoints. Accepts "WORM", "colorblind", or matplotlib colormaps. See https://matplotlib.org/stable/tutorials/colors/colormaps.html The "colorblind" colormap is referenced from Wong, B. Points of view: Color blindness. Nat Methods 8, 441 (2011). https://doi.org/10.1038/nmeth.1618 save_as : str, optional Provide a filename to save this figure. Filetype of saved figure is determined by `save_format`. Note: interactive plots can be saved by clicking the 'Download plot' button in the plot's toolbar. save_format : str, default "png" Desired format of saved or downloaded figure. Can be 'png', 'jpg', 'jpeg', 'webp', 'svg', 'pdf', 'eps', 'json', or 'html'. If 'html', an interactive plot will be saved. Only 'png', 'svg', 'jpeg', and 'webp' can be downloaded with the 'download as' button in the toolbar of an interactive plot. save_scale : numeric, default 1 Multiply title/legend/axis/canvas sizes by this factor when saving the figure. interactive : bool, default True Return an interactive plot if True or a static plot if False. """ if not isinstance(y, list): y = [y] if not isinstance(x, str): self.err_handler.raise_exception("x must be a string.") x_col = self.lookup(x) try: xsubheader = x_col.columns.get_level_values(1)[0] except: msg = ("Could not find '{}' ".format(x)+"in the speciation " "report. Available variables include " "{}".format(list(set(self.report.columns.get_level_values(0))))) self.err_handler.raise_exception(msg) try: x_plot = [float(x0[0]) if x0[0] != 'NA' else float("nan") for x0 in x_col.values.tolist()] except: msg = ("One or more the values belonging to " "'{}' are non-numeric and cannot be plotted.".format(x_col.columns.get_level_values(0)[0])) self.err_handler.raise_exception(msg) try: xunit_type, xunit = self.__get_unit_info(xsubheader) except: xunit_type = "" xunit = "" colors = get_colors(colormap, len(y), alpha=fill_alpha) for i, yi in enumerate(y): y_col = self.lookup(yi) try: subheader = y_col.columns.get_level_values(1)[0] except: msg = ("Could not find '{}' ".format(yi)+"in the speciation " "report. Available variables include " "{}".format(list(set(self.report.columns.get_level_values(0))))) self.err_handler.raise_exception(msg) try: unit_type, unit = self.__get_unit_info(subheader) except: unit_type = "" unit = "" try: y_plot = [float(y0[0]) if y0[0] != 'NA' else float("nan") for y0 in y_col.values.tolist()] except: msg = ("One or more the values belonging to " "'{}' are non-numeric and cannot be plotted.".format(y_col.columns.get_level_values(0)[0])) self.err_handler.raise_exception(msg) if i == 0: subheader_previous = subheader unit_type_previous = unit_type if unit_type != unit_type_previous and i != 0: msg = ("{} has a different unit of measurement ".format(yi)+"" "({}) than {} ({}). ".format(unit_type, yi_previous, unit_type_previous)+"" "Plotted variables must share units.") self.err_handler.raise_exception(msg) elif "activity" in subheader.lower() and "molality" in subheader_previous.lower(): msg = ("{} has a different unit of measurement ".format(yi)+"" "({}) than {} ({}). ".format("activity", yi_previous, "molality")+"" "Plotted variables must share units.") self.err_handler.raise_exception(msg) elif "molality" in subheader.lower() and "activity" in subheader_previous.lower(): msg = ("{} has a different unit of measurement ".format(yi)+"" "({}) than {} ({}). ".format("molality", yi_previous, "activity")+"" "Plotted variables must share units.") self.err_handler.raise_exception(msg) yi_previous = copy.deepcopy(yi) unit_type_previous = copy.deepcopy(unit_type) subheader_previous = copy.deepcopy(subheader) if len(y) > 1: if unit != "": ylabel = "{} [{}]".format(unit_type, unit) else: ylabel = unit_type else: if 'pH' in y: ylabel = 'pH' elif 'Temperature' in y: ylabel = 'Temperature [°C]' else: y_formatted = html_chemname_format_AqEquil(y[0]) if unit != "": ylabel = "{} {} [{}]".format(y_formatted, unit_type, unit) else: ylabel = "{} {}".format(y_formatted, unit_type) if x == 'pH': xlabel = 'pH' elif x == 'Temperature': xlabel = 'Temperature [°C]' else: x_formatted = html_chemname_format_AqEquil(x) if xunit != "": xlabel = "{} {} [{}]".format(x_formatted, xunit_type, xunit) else: xlabel = "{} {}".format(x_formatted, xunit_type) # convert rgba to hex colors = [matplotlib.colors.rgb2hex(c) for c in colors] # map each species to its color, e.g., # {'CO2': '#000000', 'HCO3-': '#1699d3', 'Other': '#736ca8'} dict_species_color = {sp:color for sp,color in zip(y, colors)} # html format color dict key names dict_species_color = {html_chemname_format_AqEquil(k):v for k,v in dict_species_color.items()} df = self.lookup(["name", x]+y).copy() df.loc[:, "name"] = df.index df.columns = df.columns.get_level_values(0) df = pd.melt(df, id_vars=["name", x], value_vars=y) df = df.rename(columns={"Sample": "y_variable", "value": "y_value"}) if (unit_type == "energy supply" or unit_type == "affinity") and self.reactions_for_plotting!=None: # get formatted reactions to display if not isinstance(self.reactions_for_plotting, pd.DataFrame): self.reactions_for_plotting = self.show_redox_reactions(formatted=True, charge_sign_at_end=False, show=False, simplify=True) y_find = [yi.replace("_energy", "").replace("_affinity", "") for yi in y] rxns = self.reactions_for_plotting.loc[y_find, :]["reaction"].tolist() rxn_dict = {rxn_name:rxn for rxn_name,rxn in zip(y, rxns)} if len(y) == 1: ylabel = "{}<br>{} [{}]".format(html_chemname_format_AqEquil(y_find[0]), unit_type, unit) df["formatted_rxn"] = df["y_variable"].map(rxn_dict) else: df["formatted_rxn"] = "" df['y_variable'] = df['y_variable'].apply(html_chemname_format_AqEquil) fig = px.scatter(df, x=x, y="y_value", color="y_variable", hover_data=[x, "y_value", "y_variable", "name", "formatted_rxn"], width=plot_width*ppi, height=plot_height*ppi, labels={x: xlabel, "y_value": ylabel}, category_orders={"species": y}, color_discrete_map=dict_species_color, opacity=fill_alpha, custom_data=['name', 'formatted_rxn'], template="simple_white") fig.update_traces(marker=dict(size=point_size), hovertemplate = "%{customdata[0]}<br>"+xlabel+": %{x} <br>"+ylabel+": %{y}<br>%{customdata[1]}") fig.update_layout(legend_title=None, title={'text':title, 'x':0.5, 'xanchor':'center'}, margin={"t": 40}, yaxis={'exponentformat':'power'}) if len(y) == 1: fig.update_layout(showlegend=False) save_as, save_format = self.__save_figure(fig, save_as, save_format, save_scale, plot_width, plot_height, ppi) config = {'displaylogo': False, 'scrollZoom': True, 'modeBarButtonsToRemove': ['select2d', 'lasso2d', 'toggleSpikelines', 'resetScale2d'], 'toImageButtonOptions': { 'format': save_format, # one of png, svg, jpeg, webp 'filename': save_as, 'height': plot_height*ppi, 'width': plot_width*ppi, 'scale': save_scale, }, } if not interactive: config['staticPlot'] = True fig.show(config=config)