Skip to content

Commit

Permalink
Fix IndexError when parsing Hessian from Gaussian frequency job (#3308)
Browse files Browse the repository at this point in the history
* add _parse_hessian method to GaussianOutput

* snake_case vars

* fix typo in test name: paramaters->parameters
  • Loading branch information
janosh committed Sep 13, 2023
1 parent a01c935 commit 173a65b
Show file tree
Hide file tree
Showing 3 changed files with 90 additions and 77 deletions.
145 changes: 79 additions & 66 deletions pymatgen/io/gaussian.py
Original file line number Diff line number Diff line change
Expand Up @@ -133,16 +133,16 @@ def __init__(
# Determine multiplicity and charge settings
if isinstance(mol, Molecule):
self.charge = charge if charge is not None else mol.charge
nelectrons = mol.charge + mol.nelectrons - self.charge
n_electrons = mol.charge + mol.nelectrons - self.charge
if spin_multiplicity is not None:
self.spin_multiplicity = spin_multiplicity
if (nelectrons + spin_multiplicity) % 2 != 1:
if (n_electrons + spin_multiplicity) % 2 != 1:
raise ValueError(
f"Charge of {self.charge} and spin multiplicity of {spin_multiplicity} is"
" not possible for this molecule"
)
else:
self.spin_multiplicity = 1 if nelectrons % 2 == 0 else 2
self.spin_multiplicity = 1 if n_electrons % 2 == 0 else 2

# Get a title from the molecule name
self.title = title or self._mol.composition.formula
Expand Down Expand Up @@ -777,25 +777,25 @@ def _parse(self, filename):
frequencies = []
read_mo = False
parse_hessian = False
routeline = ""
route_line = ""
standard_orientation = False
parse_bond_order = False
input_structures = []
std_structures = []
geom_orientation = None
opt_structures = []

with zopen(filename, mode="rt") as f:
for line in f:
with zopen(filename, mode="rt") as file:
for line in file:
if parse_stage == 0:
if start_patt.search(line):
parse_stage = 1
elif link0_patt.match(line):
m = link0_patt.match(line)
self.link0[m.group(1)] = m.group(2)
elif route_patt.search(line) or routeline != "":
elif route_patt.search(line) or route_line != "":
if set(line.strip()) == {"-"}:
params = read_route_line(routeline)
params = read_route_line(route_line)
self.functional = params[0]
self.basis_set = params[1]
self.route_parameters = params[2]
Expand All @@ -804,7 +804,7 @@ def _parse(self, filename):
parse_stage = 1
else:
line = line.replace(" ", "", 1).rstrip("\n")
routeline += line
route_line += line
elif parse_stage == 1:
if set(line.strip()) == {"-"} and self.title is None:
self.title = ""
Expand All @@ -828,15 +828,15 @@ def _parse(self, filename):
self.corrections[key] = float(m.group(3))

if read_coord:
[f.readline() for i in range(3)]
line = f.readline()
[file.readline() for i in range(3)]
line = file.readline()
sp = []
coords = []
while set(line.strip()) != {"-"}:
tokens = line.split()
sp.append(Element.from_Z(int(tokens[1])))
coords.append([float(x) for x in tokens[3:6]])
line = f.readline()
line = file.readline()

read_coord = False
if geom_orientation == "input":
Expand All @@ -861,13 +861,13 @@ def _parse(self, filename):
else:
read_eigen = False
self.eigenvalues = {Spin.up: []}
for eigenline in eigen_txt:
if "Alpha" in eigenline:
self.eigenvalues[Spin.up] += [float(e) for e in float_patt.findall(eigenline)]
elif "Beta" in eigenline:
for eigen_line in eigen_txt:
if "Alpha" in eigen_line:
self.eigenvalues[Spin.up] += [float(e) for e in float_patt.findall(eigen_line)]
elif "Beta" in eigen_line:
if Spin.down not in self.eigenvalues:
self.eigenvalues[Spin.down] = []
self.eigenvalues[Spin.down] += [float(e) for e in float_patt.findall(eigenline)]
self.eigenvalues[Spin.down] += [float(e) for e in float_patt.findall(eigen_line)]
eigen_txt = []

# read molecular orbital coefficients
Expand All @@ -887,35 +887,35 @@ def _parse(self, filename):
nMO = 0
end_mo = False
while nMO < self.num_basis_func and not end_mo:
f.readline()
f.readline()
file.readline()
file.readline()
self.atom_basis_labels = []
for i in range(self.num_basis_func):
line = f.readline()
for idx in range(self.num_basis_func):
line = file.readline()

# identify atom and OA labels
m = mo_coeff_name_patt.search(line)
if m.group(1).strip() != "":
iat = int(m.group(2)) - 1
atom_idx = int(m.group(2)) - 1
# atname = m.group(3)
self.atom_basis_labels.append([m.group(4)])
else:
self.atom_basis_labels[iat].append(m.group(4))
self.atom_basis_labels[atom_idx].append(m.group(4))

# MO coefficients
coeffs = [float(c) for c in float_patt.findall(line)]
for j, c in enumerate(coeffs):
mat_mo[spin][i, nMO + j] = c
mat_mo[spin][idx, nMO + j] = c

nMO += len(coeffs)
line = f.readline()
line = file.readline()
# manage pop=regular case (not all MO)
if nMO < self.num_basis_func and (
"Density Matrix:" in line or mo_coeff_patt.search(line)
):
end_mo = True
warnings.warn("POP=regular case, matrix coefficients not complete")
f.readline()
file.readline()

self.eigenvectors = mat_mo
read_mo = False
Expand All @@ -930,11 +930,11 @@ def _parse(self, filename):
[{} for iat in range(len(self.atom_basis_labels))] for j in range(self.num_basis_func)
]
for j in range(self.num_basis_func):
i = 0
for iat, labels in enumerate(self.atom_basis_labels):
idx = 0
for atom_idx, labels in enumerate(self.atom_basis_labels):
for label in labels:
mo[spin][j][iat][label] = self.eigenvectors[spin][i, j]
i += 1
mo[spin][j][atom_idx][label] = self.eigenvectors[spin][idx, j]
idx += 1

self.molecular_orbital = mo

Expand Down Expand Up @@ -974,56 +974,40 @@ def _parse(self, filename):
syms = line.split()[:3]
for ifreq, sym in zip(ifreqs, syms):
frequencies[ifreq]["symmetry"] = sym
line = f.readline()
line = file.readline()

# read normal modes
line = f.readline()
line = file.readline()
while normal_mode_patt.search(line):
values = list(map(float, float_patt.findall(line)))
for i, ifreq in zip(range(0, len(values), 3), ifreqs):
frequencies[ifreq]["mode"].extend(values[i : i + 3])
line = f.readline()
for idx, ifreq in zip(range(0, len(values), 3), ifreqs):
frequencies[ifreq]["mode"].extend(values[idx : idx + 3])
line = file.readline()

parse_freq = False
self.frequencies.append(frequencies)
frequencies = []

elif parse_hessian:
# read Hessian matrix under "Force constants in Cartesian coordinates"
# Hessian matrix is in the input orientation framework
# WARNING : need #P in the route line
if not (input_structures or std_structures):
raise ValueError("Both input_structures and std_structures are empty.")
parse_hessian = False
ndf = 3 * len(input_structures[0])
self.hessian = np.zeros((ndf, ndf))
j_indices = range(5)
jndf = 0
while jndf < ndf:
for i in range(jndf, ndf):
line = f.readline()
vals = re.findall(r"\s*([+-]?\d+\.\d+[eEdD]?[+-]\d+)", line)
vals = [float(val.replace("D", "E")) for val in vals]
for jval, val in enumerate(vals):
j = j_indices[jval]
self.hessian[i, j] = val
self.hessian[j, i] = val
jndf += len(vals)
line = f.readline()
j_indices = [j + 5 for j in j_indices]
self._parse_hessian(file, (input_structures or std_structures)[0])

elif parse_bond_order:
# parse Wiberg bond order
line = f.readline()
line = f.readline()
nat = len(input_structures[0])
line = file.readline()
line = file.readline()
n_atoms = len(input_structures[0])
matrix = []
for _ in range(nat):
line = f.readline()
for _ in range(n_atoms):
line = file.readline()
matrix.append([float(v) for v in line.split()[2:]])

self.bond_orders = {}
for iat in range(nat):
for jat in range(iat + 1, nat):
self.bond_orders[(iat, jat)] = matrix[iat][jat]
for atom_idx in range(n_atoms):
for atom_jdx in range(atom_idx + 1, n_atoms):
self.bond_orders[(atom_idx, atom_jdx)] = matrix[atom_idx][atom_jdx]
parse_bond_order = False

elif termination_patt.search(line):
Expand Down Expand Up @@ -1063,7 +1047,7 @@ def _parse(self, filename):
geom_orientation = "input"
read_coord = True
elif "Optimization completed." in line:
line = f.readline()
line = file.readline()
if " -- Stationary point found." not in line:
warnings.warn(
f"\n{self.filename}: Optimization complete but this is not a stationary point"
Expand All @@ -1082,7 +1066,7 @@ def _parse(self, filename):
parse_forces = True
elif freq_on_patt.search(line):
parse_freq = True
[f.readline() for i in range(3)]
[file.readline() for i in range(3)]
elif mo_coeff_patt.search(line):
if "Alpha" in line:
self.is_spin = True
Expand All @@ -1093,7 +1077,7 @@ def _parse(self, filename):
resume = []
while not resume_end_patt.search(line):
resume.append(line)
line = f.readline()
line = file.readline()
# security if \\@ not in one line !
if line == "\n":
break
Expand Down Expand Up @@ -1129,7 +1113,36 @@ def _parse(self, filename):
self.opt_structures = opt_structures

if not terminated:
warnings.warn("\n" + self.filename + ": Termination error or bad Gaussian output file !")
warnings.warn(f"\n{self.filename}: Termination error or bad Gaussian output file !")

def _parse_hessian(self, file, structure):
"""
Parse the hessian matrix in the output file.
Args:
file: file object
structure: structure in the output file
"""
# read Hessian matrix under "Force constants in Cartesian coordinates"
# Hessian matrix is in the input orientation framework
# WARNING : need #P in the route line

ndf = 3 * len(structure)
self.hessian = np.zeros((ndf, ndf))
j_indices = range(5)
ndf_idx = 0
while ndf_idx < ndf:
for i in range(ndf_idx, ndf):
line = file.readline()
vals = re.findall(r"\s*([+-]?\d+\.\d+[eEdD]?[+-]\d+)", line)
vals = [float(val.replace("D", "E")) for val in vals]
for val_idx, val in enumerate(vals):
j = j_indices[val_idx]
self.hessian[i, j] = val
self.hessian[j, i] = val
ndf_idx += len(vals)
line = file.readline()
j_indices = [j + 5 for j in j_indices]

def _check_pcm(self, line):
energy_patt = re.compile(r"(Dispersion|Cavitation|Repulsion) energy\s+\S+\s+=\s+(\S*)")
Expand Down
4 changes: 2 additions & 2 deletions tests/files/.pytest-split-durations
Original file line number Diff line number Diff line change
Expand Up @@ -1921,7 +1921,7 @@
"tests/io/test_gaussian.py::TestGaussianInput::test_from_string": 0.0007665419252589345,
"tests/io/test_gaussian.py::TestGaussianInput::test_gen_basis": 0.00042866700096055865,
"tests/io/test_gaussian.py::TestGaussianInput::test_init": 0.00040841702139005065,
"tests/io/test_gaussian.py::TestGaussianInput::test_multiple_paramaters": 0.0015059159486554563,
"tests/io/test_gaussian.py::TestGaussianInput::test_multiple_parameters": 0.0015059159486554563,
"tests/io/test_gaussian.py::TestGaussianInput::test_no_molecule": 0.00032249902142211795,
"tests/io/test_gaussian.py::TestGaussianInput::test_no_molecule_func_bset_charge_mult": 0.00029816600726917386,
"tests/io/test_gaussian.py::TestGaussianInput::test_str_and_from_string": 0.0006575410370714962,
Expand Down Expand Up @@ -2546,4 +2546,4 @@
"tests/util/test_typing.py::test_species_like": 0.000255541002843529,
"tests/vis/test_plotters.py::TestSpectrumPlotter::test_get_plot": 0.06307895801728591,
"tests/vis/test_plotters.py::TestSpectrumPlotter::test_get_stacked_plot": 0.14057258295360953
}
}
18 changes: 9 additions & 9 deletions tests/io/test_gaussian.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,11 +16,11 @@
class TestGaussianInput(unittest.TestCase):
def setUp(self):
coords = [
[0.000000, 0.000000, 0.000000],
[0.000000, 0.000000, 1.089000],
[1.026719, 0.000000, -0.363000],
[-0.513360, -0.889165, -0.363000],
[-0.513360, 0.889165, -0.363000],
[0, 0, 0],
[0, 0, 1.089],
[1.026719, 0, -0.363],
[-0.513360, -0.889165, -0.363],
[-0.513360, 0.889165, -0.363],
]
self.coords = coords
mol = Molecule(["C", "H", "H", "H", "H"], coords)
Expand Down Expand Up @@ -197,7 +197,7 @@ def test_gen_basis(self):
)
assert gau.to_str(cart_coords=False) == gau_str

def test_multiple_paramaters(self):
def test_multiple_parameters(self):
"""
This test makes sure that input files with multi-parameter keywords
and route cards with multiple lines can be parsed accurately.
Expand Down Expand Up @@ -255,10 +255,10 @@ class TestGaussianOutput(unittest.TestCase):
# TODO: Add unittest for PCM type output.

def setUp(self):
self.gauout = GaussianOutput(f"{test_dir}/methane.log")
self.gau_out = GaussianOutput(f"{test_dir}/methane.log")

def test_resume(self):
resume = self.gauout.resumes[0]
resume = self.gau_out.resumes[0]
methane_resume = r"""1\1\GINC-SHYUE-LAPTOP\FOpt\RHF\3-21G\C1H4\SHYUE\27-Feb-2008\0\\#p hf/3
-21G opt\\Title Card Required\\0,1\C,0.,0.,0.\H,0.,0.,1.0829014152\H,1
.0209692454,0.,-0.3609671384\H,-0.5104846227,-0.884185303,-0.360967138
Expand All @@ -270,7 +270,7 @@ def test_resume(self):
assert resume == methane_resume

def test_props(self):
gau = self.gauout
gau = self.gau_out
assert len(gau.energies) == 3
assert gau.energies[-1] == approx(-39.9768775602)
assert len(gau.structures) == 4
Expand Down

0 comments on commit 173a65b

Please sign in to comment.