Source code for pymatgen.io.qchem_io.outputs

# coding: utf-8
# Copyright (c) Pymatgen Development Team.
# Distributed under the terms of the MIT License.

import re
import logging
import os
import numpy as np
import math

from monty.io import zopen
from monty.json import jsanitize
from monty.json import MSONable
from pymatgen.core import Molecule

from .utils import read_table_pattern, read_pattern, process_parsed_coords

__author__ = "Samuel Blau, Brandon Wood, Shyam Dwaraknath"
__copyright__ = "Copyright 2018, The Materials Project"
__version__ = "0.1"

logger = logging.getLogger(__name__)


[docs]class QCOutput(MSONable): """ Class to parse QChem output files. """ def __init__(self, filename): """ Args: filename (str): Filename to parse """ self.filename = filename self.data = {} self.data["errors"] = [] self.text = "" with zopen(filename, 'rt') as f: self.text = f.read() # Check if output file contains multiple output files. If so, print an error message and exit self.data["multiple_outputs"] = read_pattern( self.text, { "key": r"Job\s+\d+\s+of\s+(\d+)\s+" }, terminate_on_match=True).get('key') if not (self.data.get('multiple_outputs') == None or self.data.get('multiple_outputs') == [['1']]): print( "ERROR: multiple calculation outputs found in file " + filename + ". Please instead call QCOutput.mulitple_outputs_from_file(QCOutput,'" + filename + "')") print("Exiting...") exit() # Parse the molecular details: charge, multiplicity, # species, and initial geometry. self._read_charge_and_multiplicity() if self.data.get('charge') != None: self._read_species_and_inital_geometry() # Check if calculation finished self.data["completion"] = read_pattern( self.text, { "key": r"Thank you very much for using Q-Chem.\s+Have a nice day." }, terminate_on_match=True).get('key') # If the calculation finished, parse the job time. if self.data.get('completion', []): temp_timings = read_pattern( self.text, { "key": r"Total job time\:\s*([\d\-\.]+)s\(wall\)\,\s*([\d\-\.]+)s\(cpu\)" }).get('key') if temp_timings != None: self.data["walltime"] = float(temp_timings[0][0]) self.data["cputime"] = float(temp_timings[0][1]) else: self.data["walltime"] = 'nan' self.data["cputime"] = 'nan' # Check if calculation is unrestricted self.data["unrestricted"] = read_pattern( self.text, { "key": r"A(?:n)*\sunrestricted[\s\w\-]+SCF\scalculation\swill\sbe" }, terminate_on_match=True).get('key') # Check if calculation uses GEN_SCFMAN, multiple potential output formats self.data["using_GEN_SCFMAN"] = read_pattern( self.text, { "key": r"\s+GEN_SCFMAN: A general SCF calculation manager" }, terminate_on_match=True).get('key') if not self.data["using_GEN_SCFMAN"]: self.data["using_GEN_SCFMAN"] = read_pattern( self.text, { "key": r"\s+General SCF calculation program by" }, terminate_on_match=True).get('key') # Check if the SCF failed to converge if read_pattern( self.text, { "key": r"SCF failed to converge" }, terminate_on_match=True).get('key') == [[]]: self.data["errors"] += ["SCF_failed_to_converge"] # Parse the SCF self._read_SCF() # Parse the Mulliken charges self._read_mulliken() # Parse the final energy temp_final_energy = read_pattern( self.text, { "key": r"Final\senergy\sis\s+([\d\-\.]+)" }).get('key') if temp_final_energy == None: self.data["final_energy"] = None else: self.data["final_energy"] = float(temp_final_energy[0][0]) # Parse the S2 values in the case of an unrestricted calculation if self.data.get('unrestricted', []): temp_S2 = read_pattern(self.text, { "key": r"<S\^2>\s=\s+([\d\-\.]+)" }).get('key') if temp_S2 == None: self.data["S2"] = None elif len(temp_S2) == 1: self.data["S2"] = float(temp_S2[0][0]) else: real_S2 = np.zeros(len(temp_S2)) for ii, entry in enumerate(temp_S2): real_S2[ii] = float(entry[0]) self.data["S2"] = real_S2 # Check if the calculation is a geometry optimization. If so, parse the relevant output self.data["optimization"] = read_pattern( self.text, { "key": r"(?i)\s*job(?:_)*type\s*(?:=)*\s*opt" }).get('key') if self.data.get('optimization', []): temp_energy_trajectory = read_pattern( self.text, { "key": r"\sEnergy\sis\s+([\d\-\.]+)" }).get('key') if temp_energy_trajectory == None: self.data["energy_trajectory"] = [] else: real_energy_trajectory = np.zeros(len(temp_energy_trajectory)) for ii, entry in enumerate(temp_energy_trajectory): real_energy_trajectory[ii] = float(entry[0]) self.data["energy_trajectory"] = real_energy_trajectory self._read_optimized_geometry() # Then, if no optimized geometry or z-matrix is found, and no errors have been previously # idenfied, check to see if the optimization failed to converge or if Lambda wasn't able # to be determined. Also, if there is an energy trajectory, read the last geometry in the # optimization trajectory for use in the next input file. if len(self.data.get("errors")) == 0 and len( self.data.get('optimized_geometry')) == 0 and len( self.data.get('optimized_zmat')) == 0: self._check_optimization_errors() self._read_last_geometry() # Check if the calculation contains a constraint in an $opt section. self.data["opt_constraint"] = read_pattern(self.text, { "key": r"\$opt\s+CONSTRAINT" }).get('key') if self.data.get('opt_constraint'): temp_constraint = read_pattern( self.text, { "key": r"Constraints and their Current Values\s+Value\s+Constraint\s+(\w+)\:\s+([\d\-\.]+)\s+([\d\-\.]+)\s+([\d\-\.]+)\s+([\d\-\.]+)\s+([\d\-\.]+)\s+([\d\-\.]+)" }).get('key') if temp_constraint != None: self.data["opt_constraint"] = temp_constraint[0] if float(self.data.get('opt_constraint')[5]) != float( self.data.get('opt_constraint')[6]): if abs(float(self.data.get('opt_constraint')[5])) != abs( float(self.data.get('opt_constraint')[6])): raise ValueError( "ERROR: Opt section value and constraint should be the same!" ) elif abs(float( self.data.get('opt_constraint')[5])) not in [ 0.0, 180.0 ]: raise ValueError( "ERROR: Opt section value and constraint can only differ by a sign at 0.0 and 180.0!" ) # Check if the calculation is a frequency analysis. If so, parse the relevant output self.data["frequency_job"] = read_pattern( self.text, { "key": r"(?i)\s*job(?:_)*type\s*(?:=)*\s*freq" }, terminate_on_match=True).get('key') if self.data.get('frequency_job', []): self._read_frequency_data() # If the calculation did not finish and no errors have been identified yet, check for other errors if not self.data.get('completion', []) and self.data.get("errors") == []: self._check_completion_errors()
[docs] @staticmethod def multiple_outputs_from_file(cls, filename, keep_sub_files=True): """ Parses a QChem output file with multiple calculations 1.) Seperates the output into sub-files e.g. qcout -> qcout.0, qcout.1, qcout.2 ... qcout.N a.) Find delimeter for multiple calcualtions b.) Make seperate output sub-files 2.) Creates seperate QCCalcs for each one from the sub-files """ to_return = [] with zopen(filename, 'rt') as f: text = re.split('\s*(?:Running\s+)*Job\s+\d+\s+of\s+\d+\s+', f.read()) if text[0] == '': text = text[1:] for i, sub_text in enumerate(text): temp = open(filename + '.' + str(i), 'w') temp.write(sub_text) temp.close() tempOutput = cls(filename + '.' + str(i)) to_return.append(tempOutput) if not keep_sub_files: os.remove(filename + '.' + str(i)) return to_return
def _read_charge_and_multiplicity(self): """ Parses charge and multiplicity. """ temp_charge = read_pattern( self.text, { "key": r"\$molecule\s+([\-\d]+)\s+\d" }, terminate_on_match=True).get('key') if temp_charge != None: self.data["charge"] = int(temp_charge[0][0]) else: temp_charge = read_pattern( self.text, { "key": r"Sum of atomic charges \=\s+([\d\-\.\+]+)" }, terminate_on_match=True).get('key') if temp_charge == None: self.data["charge"] = None else: self.data["charge"] = int(float(temp_charge[0][0])) temp_multiplicity = read_pattern( self.text, { "key": r"\$molecule\s+[\-\d]+\s+(\d)" }, terminate_on_match=True).get('key') if temp_multiplicity != None: self.data["multiplicity"] = int(temp_multiplicity[0][0]) else: temp_multiplicity = read_pattern( self.text, { "key": r"Sum of spin\s+charges \=\s+([\d\-\.\+]+)" }, terminate_on_match=True).get('key') if temp_multiplicity == None: self.data["multiplicity"] = 1 else: self.data["multiplicity"] = int( float(temp_multiplicity[0][0])) + 1 def _read_species_and_inital_geometry(self): """ Parses species and initial geometry. """ header_pattern = r"Standard Nuclear Orientation \(Angstroms\)\s+I\s+Atom\s+X\s+Y\s+Z\s+-+" table_pattern = r"\s*\d+\s+([a-zA-Z]+)\s*([\d\-\.]+)\s*([\d\-\.]+)\s*([\d\-\.]+)\s*" footer_pattern = r"\s*-+" temp_geom = read_table_pattern(self.text, header_pattern, table_pattern, footer_pattern) if temp_geom == None or len(temp_geom) == 0: self.data["species"] = None self.data["initial_geometry"] = None self.data["initial_molecule"] = None else: temp_geom = temp_geom[0] species = [] geometry = np.zeros(shape=(len(temp_geom), 3), dtype=float) for ii, entry in enumerate(temp_geom): species += [entry[0]] for jj in range(3): geometry[ii, jj] = float(entry[jj + 1]) self.data["species"] = species self.data["initial_geometry"] = geometry self.data["initial_molecule"] = Molecule( species=species, coords=geometry, charge=self.data.get('charge'), spin_multiplicity=self.data.get('multiplicity')) def _read_SCF(self): """ Parses both old and new SCFs. """ if self.data.get('using_GEN_SCFMAN', []): if "SCF_failed_to_converge" in self.data.get("errors"): footer_pattern = r"^\s*gen_scfman_exception: SCF failed to converge" else: footer_pattern = r"^\s*\-+\n" header_pattern = r"^\s*\-+\s+Cycle\s+Energy\s+(?:(?:DIIS)*\s+[Ee]rror)*(?:RMS Gradient)*\s+\-+(?:\s*\-+\s+OpenMP\s+Integral\s+computing\s+Module\s+(?:Release:\s+version\s+[\d\-\.]+\,\s+\w+\s+[\d\-\.]+\, Q-Chem Inc\. Pittsburgh\s+)*\-+)*\n" table_pattern = r"(?:\s*Nonlocal correlation = [\d\-\.]+e[\d\-]+)*(?:\s*Inaccurate integrated density:\n\s+Number of electrons\s+=\s+[\d\-\.]+\n\s+Numerical integral\s+=\s+[\d\-\.]+\n\s+Relative error\s+=\s+[\d\-\.]+\s+\%\n)*\s*\d+\s+([\d\-\.]+)\s+([\d\-\.]+)e([\d\-\.\+]+)(?:\s+Convergence criterion met)*(?:\s+Preconditoned Steepest Descent)*(?:\s+Roothaan Step)*(?:\s+(?:Normal\s+)*BFGS [Ss]tep)*(?:\s+LineSearch Step)*(?:\s+Line search: overstep)*(?:\s+Descent step)*" else: if "SCF_failed_to_converge" in self.data.get("errors"): footer_pattern = r"^\s*\d+\s*[\d\-\.]+\s+[\d\-\.]+E[\d\-\.]+\s+Convergence\s+failure\n" else: footer_pattern = r"^\s*\-+\n" header_pattern = r"^\s*\-+\s+Cycle\s+Energy\s+DIIS Error\s+\-+\n" table_pattern = r"(?:\s*Inaccurate integrated density:\n\s+Number of electrons\s+=\s+[\d\-\.]+\n\s+Numerical integral\s+=\s+[\d\-\.]+\n\s+Relative error\s+=\s+[\d\-\.]+\s+\%\n)*\s*\d+\s*([\d\-\.]+)\s+([\d\-\.]+)E([\d\-\.\+]+)(?:\s*\n\s*cpu\s+[\d\-\.]+\swall\s+[\d\-\.]+)*(?:\nin dftxc\.C, eleTot sum is:[\d\-\.]+, tauTot is\:[\d\-\.]+)*(?:\s+Convergence criterion met)*(?:\s+Done RCA\. Switching to DIIS)*(?:\n\s*Warning: not using a symmetric Q)*(?:\nRecomputing EXC\s*[\d\-\.]+\s*[\d\-\.]+\s*[\d\-\.]+(?:\s*\nRecomputing EXC\s*[\d\-\.]+\s*[\d\-\.]+\s*[\d\-\.]+)*)*" temp_scf = read_table_pattern(self.text, header_pattern, table_pattern, footer_pattern) real_scf = [] for one_scf in temp_scf: temp = np.zeros(shape=(len(one_scf), 2)) for ii, entry in enumerate(one_scf): temp[ii, 0] = float(entry[0]) temp[ii, 1] = float(entry[1]) * 10**float(entry[2]) real_scf += [temp] self.data["SCF"] = real_scf def _read_mulliken(self): """ Parses Mulliken charges. Also parses spins given an unrestricted SCF. """ if self.data.get('unrestricted', []): header_pattern = r"\-+\s+Ground-State Mulliken Net Atomic Charges\s+Atom\s+Charge \(a\.u\.\)\s+Spin\s\(a\.u\.\)\s+\-+" table_pattern = r"\s+\d+\s\w+\s+([\d\-\.]+)\s+([\d\-\.]+)" footer_pattern = r"\s\s\-+\s+Sum of atomic charges" else: header_pattern = r"\-+\s+Ground-State Mulliken Net Atomic Charges\s+Atom\s+Charge \(a\.u\.\)\s+\-+" table_pattern = r"\s+\d+\s\w+\s+([\d\-\.]+)" footer_pattern = r"\s\s\-+\s+Sum of atomic charges" temp_mulliken = read_table_pattern(self.text, header_pattern, table_pattern, footer_pattern) real_mulliken = [] for one_mulliken in temp_mulliken: if self.data.get('unrestricted', []): temp = np.zeros(shape=(len(one_mulliken), 2)) for ii, entry in enumerate(one_mulliken): temp[ii, 0] = float(entry[0]) temp[ii, 1] = float(entry[1]) else: temp = np.zeros(len(one_mulliken)) for ii, entry in enumerate(one_mulliken): temp[ii] = float(entry[0]) real_mulliken += [temp] self.data["Mulliken"] = real_mulliken def _read_optimized_geometry(self): """ Parses optimized XYZ coordinates. If not present, parses optimized Z-matrix. """ header_pattern = r"\*+\s+OPTIMIZATION\s+CONVERGED\s+\*+\s+\*+\s+Coordinates \(Angstroms\)\s+ATOM\s+X\s+Y\s+Z" table_pattern = r"\s+\d+\s+\w+\s+([\d\-\.]+)\s+([\d\-\.]+)\s+([\d\-\.]+)" footer_pattern = r"\s+Z-matrix Print:" parsed_optimized_geometry = read_table_pattern( self.text, header_pattern, table_pattern, footer_pattern) if parsed_optimized_geometry == [] or None: self.data["optimized_geometry"] = [] header_pattern = r"^\s+\*+\s+OPTIMIZATION CONVERGED\s+\*+\s+\*+\s+Z-matrix\s+Print:\s+\$molecule\s+[\d\-]+\s+[\d\-]+\n" table_pattern = r"\s*(\w+)(?:\s+(\d+)\s+([\d\-\.]+)(?:\s+(\d+)\s+([\d\-\.]+)(?:\s+(\d+)\s+([\d\-\.]+))*)*)*(?:\s+0)*" footer_pattern = r"^\$end\n" self.data["optimized_zmat"] = read_table_pattern( self.text, header_pattern, table_pattern, footer_pattern) else: self.data["optimized_geometry"] = process_parsed_coords( parsed_optimized_geometry[0]) if self.data.get('charge') != None: self.data["molecule_from_optimized_geometry"] = Molecule( species=self.data.get('species'), coords=self.data.get('optimized_geometry'), charge=self.data.get('charge'), spin_multiplicity=self.data.get('multiplicity')) def _read_last_geometry(self): """ Parses the last geometry from an optimization trajectory for use in a new input file. """ header_pattern = r"\s+Optimization\sCycle:\s+" + \ str(len(self.data.get("energy_trajectory"))) + \ "\s+Coordinates \(Angstroms\)\s+ATOM\s+X\s+Y\s+Z" table_pattern = r"\s+\d+\s+\w+\s+([\d\-\.]+)\s+([\d\-\.]+)\s+([\d\-\.]+)" footer_pattern = r"\s+Point Group\:\s+[\d\w]+\s+Number of degrees of freedom\:\s+\d+" parsed_last_geometry = read_table_pattern( self.text, header_pattern, table_pattern, footer_pattern) if parsed_last_geometry == [] or None: self.data["last_geometry"] = [] else: self.data["last_geometry"] = process_parsed_coords( parsed_last_geometry[0]) if self.data.get('charge') != None: self.data["molecule_from_last_geometry"] = Molecule( species=self.data.get('species'), coords=self.data.get('last_geometry'), charge=self.data.get('charge'), spin_multiplicity=self.data.get('multiplicity')) def _read_frequency_data(self): """ Parses frequencies, enthalpy, entropy, and mode vectors. """ temp_dict = read_pattern( self.text, { "frequencies": r"\s*Frequency:\s+([\d\-\.]+)(?:\s+([\d\-\.]+)(?:\s+([\d\-\.]+))*)*", "enthalpy": r"\s*Total Enthalpy:\s+([\d\-\.]+)\s+kcal/mol", "entropy": r"\s*Total Entropy:\s+([\d\-\.]+)\s+cal/mol\.K" }) if temp_dict.get('enthalpy') == None: self.data['enthalpy'] = [] else: self.data['enthalpy'] = float(temp_dict.get('enthalpy')[0][0]) if temp_dict.get('entropy') == None: self.data['entropy'] = None else: self.data['entropy'] = float(temp_dict.get('entropy')[0][0]) if temp_dict.get('frequencies') == None: self.data['frequencies'] = None else: temp_freqs = [ value for entry in temp_dict.get('frequencies') for value in entry ] freqs = np.zeros(len(temp_freqs) - temp_freqs.count('None')) for ii, entry in enumerate(temp_freqs): if entry != 'None': freqs[ii] = float(entry) self.data['frequencies'] = freqs header_pattern = r"\s*Raman Active:\s+[YESNO]+\s+(?:[YESNO]+\s+)*X\s+Y\s+Z\s+(?:X\s+Y\s+Z\s+)*" table_pattern = r"\s*[a-zA-Z][a-zA-Z\s]\s*([\d\-\.]+)\s*([\d\-\.]+)\s*([\d\-\.]+)\s*(?:([\d\-\.]+)\s*([\d\-\.]+)\s*([\d\-\.]+)\s*(?:([\d\-\.]+)\s*([\d\-\.]+)\s*([\d\-\.]+))*)*" footer_pattern = r"TransDip\s+[\d\-\.]+\s*[\d\-\.]+\s*[\d\-\.]+\s*(?:[\d\-\.]+\s*[\d\-\.]+\s*[\d\-\.]+\s*)*" temp_freq_mode_vecs = read_table_pattern( self.text, header_pattern, table_pattern, footer_pattern) freq_mode_vecs = np.zeros( shape=(len(freqs), len(temp_freq_mode_vecs[0]), 3)) for ii, triple_FMV in enumerate(temp_freq_mode_vecs): for jj, line in enumerate(triple_FMV): for kk, entry in enumerate(line): if entry != 'None': freq_mode_vecs[int(ii * 3 + math.floor(kk / 3)), jj, kk % 3] = float(entry) self.data["frequency_mode_vectors"] = freq_mode_vecs def _check_optimization_errors(self): """ Parses three potential optimization errors: failing to converge within the allowed number of optimization cycles, failure to determine the lamda needed to continue, and inconsistent size of MO files due to a linear dependence in the AO basis. """ if read_pattern( self.text, { "key": r"MAXIMUM OPTIMIZATION CYCLES REACHED" }, terminate_on_match=True).get('key') == [[]]: self.data["errors"] += ["out_of_opt_cycles"] elif read_pattern( self.text, { "key": r"UNABLE TO DETERMINE Lamda IN FormD" }, terminate_on_match=True).get('key') == [[]]: self.data["errors"] += ["unable_to_determine_lamda"] elif read_pattern( self.text, { "key": r"Inconsistent size for SCF MO coefficient file" }, terminate_on_match=True).get('key') == [[]]: self.data["errors"] += ["linear_dependent_basis"] def _check_completion_errors(self): """ Parses four potential errors that can cause jobs to crash: inability to transform coordinates due to a bad symmetric specification, an input file that fails to pass inspection, and errors reading and writing files. """ if read_pattern( self.text, { "key": r"Coordinates do not transform within specified threshold" }, terminate_on_match=True).get('key') == [[]]: self.data["errors"] += ["failed_to_transform_coords"] elif read_pattern( self.text, { "key": r"The Q\-Chem input file has failed to pass inspection" }, terminate_on_match=True).get('key') == [[]]: self.data["errors"] += ["input_file_error"] elif read_pattern( self.text, { "key": r"Error opening input stream" }, terminate_on_match=True).get('key') == [[]]: self.data["errors"] += ["failed_to_read_input"] elif read_pattern( self.text, { "key": r"FileMan error: End of file reached prematurely" }, terminate_on_match=True).get('key') == [[]]: self.data["errors"] += ["IO_error"] else: self.data["errors"] += ["unknown_error"]
[docs] def as_dict(self): d = {} d["data"] = self.data d["text"] = self.text d["filename"] = self.filename return jsanitize(d, strict=True)