Add prepare_segmented function

theochem · Jul 11, 2024 · 8708ad3 · 8708ad3
1 parent cdb30cc
commit 8708ad3
Show file tree

Hide file tree

Showing 19 changed files with 446 additions and 170 deletions.
diff --git a/docs/example_scripts/convert_fchk_molden.py b/docs/example_scripts/convert_fchk_molden.py
@@ -5,4 +5,4 @@
 mol = load_one("water.fchk")
 # Here you may put some code to manipulate mol before writing it the data
 # to a different file.
-dump_one(mol, "water.molden")
+dump_one(mol, "water.molden", allow_changes=True)
diff --git a/iodata/basis.py b/iodata/basis.py
@@ -239,13 +239,3 @@ class MolecularBasis:
     def nbasis(self) -> int:
         """Number of basis functions."""
         return sum(shell.nbasis for shell in self.shells)
-
-    def get_segmented(self):
-        """Unroll generalized contractions."""
-        shells = []
-        for shell in self.shells:
-            for angmom, kind, coeffs in zip(shell.angmoms, shell.kinds, shell.coeffs.T):
-                shells.append(
-                    Shell(shell.icenter, [angmom], [kind], shell.exponents, coeffs.reshape(-1, 1))
-                )
-        return attrs.evolve(self, shells=shells)
diff --git a/iodata/convert.py b/iodata/convert.py
@@ -23,18 +23,20 @@
 Most of them are used by the prepare module.
 """
 
+import attrs
 import numpy as np
 from numpy.typing import NDArray
 
-from .basis import MolecularBasis
+from .basis import MolecularBasis, Shell
 from .orbitals import MolecularOrbitals
 
 __all__ = (
-    "convert_to_unrestricted",
     "convert_conventions",
     "iter_cart_alphabet",
     "HORTON2_CONVENTIONS",
     "CCA_CONVENTIONS",
+    "convert_to_unrestricted",
+    "convert_to_segmented",
 )
 
 
@@ -253,3 +255,31 @@ def convert_to_unrestricted(mo: MolecularOrbitals) -> MolecularOrbitals:
         None if mo.energies is None else np.concatenate([mo.energies, mo.energies]),
         None if mo.irreps is None else np.concatenate([mo.irreps, mo.irreps]),
     )
+
+
+def convert_to_segmented(obasis: MolecularBasis, keep_sp: bool = False) -> MolecularBasis:
+    """Convert basis with generalized contractions to one with only single contractions.
+
+    Parameters
+    ----------
+    obasis
+        The basis set to convert.
+    keep_sp
+        If True, SP shells are not split up.
+        This can be useful for file formats only support
+        SP-generalized contractions and no other ones.
+
+    Returns
+    -------
+    A new ``MolecularBasis`` instance with separate single contractions.
+    """
+    shells = []
+    for shell in obasis.shells:
+        if (shell.ncon == 1) or (keep_sp and shell.ncon == 2 and (shell.angmoms == [0, 1]).all()):
+            shells.append(shell)
+        else:
+            for angmom, kind, coeffs in zip(shell.angmoms, shell.kinds, shell.coeffs.T):
+                shells.append(
+                    Shell(shell.icenter, [angmom], [kind], shell.exponents, coeffs.reshape(-1, 1))
+                )
+    return attrs.evolve(obasis, shells=shells)
diff --git a/iodata/formats/fchk.py b/iodata/formats/fchk.py
@@ -31,7 +31,8 @@
 from ..docstrings import document_dump_one, document_load_many, document_load_one
 from ..iodata import IOData
 from ..orbitals import MolecularOrbitals
-from ..utils import DumpError, LineIterator, LoadError, LoadWarning, PrepareDumpError, amu
+from ..prepare import prepare_segmented
+from ..utils import LineIterator, LoadError, LoadWarning, PrepareDumpError, amu
 
 __all__ = ()
 
@@ -592,7 +593,7 @@ def prepare_dump(data: IOData, allow_changes: bool, filename: str) -> IOData:
                 "followed by fully virtual ones.",
                 filename,
             )
-    return data
+    return prepare_segmented(data, True, allow_changes, filename, "FCHK")
 
 
 @document_dump_one(
@@ -671,7 +672,10 @@ def dump_one(f: TextIO, data: IOData):
             elif shell.ncon == 2 and (shell.angmoms == [0, 1]).all():
                 shell_types.append(-1)
             else:
-                raise DumpError("Cannot identify type of shell!", f)
+                raise RuntimeError(
+                    "Generalized contractions other than SP are not supported. "
+                    "Call prepare_dump first."
+                )
 
         num_pure_d_shells = sum([1 for st in shell_types if st == 2])
         num_pure_f_shells = sum([1 for st in shell_types if st == 3])

diff --git a/iodata/formats/molden.py b/iodata/formats/molden.py
@@ -40,7 +40,7 @@
 from ..orbitals import MolecularOrbitals
 from ..overlap import compute_overlap, gob_cart_normalization
 from ..periodic import num2sym, sym2num
-from ..prepare import prepare_unrestricted_aminusb
+from ..prepare import prepare_segmented, prepare_unrestricted_aminusb
 from ..utils import DumpError, LineIterator, LoadError, LoadWarning, PrepareDumpError, angstrom
 
 __all__ = ()
@@ -793,7 +793,8 @@ def prepare_dump(data: IOData, allow_changes: bool, filename: str) -> IOData:
         raise PrepareDumpError("The Molden format requires an orbital basis set.", filename)
     if data.mo.kind == "generalized":
         raise PrepareDumpError("Cannot write Molden file with generalized orbitals.", filename)
-    return prepare_unrestricted_aminusb(data, allow_changes, filename, "Molden")
+    data = prepare_unrestricted_aminusb(data, allow_changes, filename, "Molden")
+    return prepare_segmented(data, False, allow_changes, filename, "Molden")
 
 
 @document_dump_one("Molden", ["atcoords", "atnums", "mo", "obasis"], ["atcorenums", "title"])
@@ -826,16 +827,19 @@ def dump_one(f: TextIO, data: IOData):
     # to be relevant. If it happens, an error is raised.
     angmom_kinds = {}
     for shell in obasis.shells:
-        for angmom, kind in zip(shell.angmoms, shell.kinds):
-            if angmom in angmom_kinds:
-                if kind != angmom_kinds[angmom]:
-                    raise DumpError(
-                        "Molden format does not support mixed pure+Cartesian functions for one "
-                        "angular momentum.",
-                        f,
-                    )
-            else:
-                angmom_kinds[angmom] = kind
+        if shell.ncon != 1:
+            raise RuntimeError("Generalized contractions not supported. Call prepare_dump first.")
+        kind = shell.kinds[0]
+        angmom = shell.angmoms[0]
+        if angmom in angmom_kinds:
+            if kind != angmom_kinds[angmom]:
+                raise DumpError(
+                    "Molden format does not support mixed pure+Cartesian functions for one "
+                    "angular momentum.",
+                    f,
+                )
+        else:
+            angmom_kinds[angmom] = kind
 
     # Fill in some defaults (Cartesian) for angmom kinds if needed.
     angmom_kinds.setdefault(2, "c")
@@ -863,12 +867,12 @@ def dump_one(f: TextIO, data: IOData):
                 f.write("\n")
             last_icenter = shell.icenter
             f.write("%3i 0\n" % (shell.icenter + 1))
-        # Write out as a segmented basis. Molden format does not support
-        # generalized contractions.
-        for iangmom, angmom in enumerate(shell.angmoms):
-            f.write(f" {angmom_its(angmom):1s}  {shell.nexp:3d} 1.00\n")
-            for exponent, coeff in zip(shell.exponents, shell.coeffs[:, iangmom]):
-                f.write(f"{exponent:20.10f} {coeff:20.10f}\n")
+        # Write out the basis.
+        # It is guaranteed to be segmented when reaching this part of the code.
+        angmom = shell.angmoms[0]
+        f.write(f" {angmom_its(angmom):1s}  {shell.nexp:3d} 1.00\n")
+        for exponent, coeff in zip(shell.exponents, shell.coeffs[:, 0]):
+            f.write(f"{exponent:20.10f} {coeff:20.10f}\n")
     f.write("\n")
 
     # Get the permutation to convert the orbital coefficients to Molden conventions.
@@ -913,7 +917,7 @@ def dump_one(f: TextIO, data: IOData):
             irreps,
         )
     else:
-        raise RuntimeError("This should not happen because of prepare_dump")
+        raise RuntimeError("Generalized orbitals are not support. Call prepare_dump first.")
 
 
 def _dump_helper_orb(f, spin, occs, coeffs, energies, irreps):

diff --git a/iodata/formats/molekel.py b/iodata/formats/molekel.py
@@ -34,7 +34,7 @@
 from ..docstrings import document_dump_one, document_load_one
 from ..iodata import IOData
 from ..orbitals import MolecularOrbitals
-from ..prepare import prepare_unrestricted_aminusb
+from ..prepare import prepare_segmented, prepare_unrestricted_aminusb
 from ..utils import DumpError, LineIterator, LoadError, LoadWarning, PrepareDumpError, angstrom
 from .molden import CONVENTIONS, _fix_molden_from_buggy_codes
 
@@ -308,7 +308,8 @@ def prepare_dump(data: IOData, allow_changes: bool, filename: str) -> IOData:
         raise PrepareDumpError("The Molekel format requires an orbital basis set.", filename)
     if data.mo.kind == "generalized":
         raise PrepareDumpError("Cannot write Molekel file with generalized orbitals.", filename)
-    return prepare_unrestricted_aminusb(data, allow_changes, filename, "Molekel")
+    data = prepare_unrestricted_aminusb(data, allow_changes, filename, "Molekel")
+    return prepare_segmented(data, False, allow_changes, filename, "Molekel")
 
 
 @document_dump_one("Molekel", ["atcoords", "atnums", "mo", "obasis"], ["atcharges"])
@@ -346,15 +347,18 @@ def dump_one(f: TextIO, data: IOData):
     f.write("$BASIS\n")
     iatom_last = 0
     for shell in data.obasis.shells:
+        if shell.ncon != 1:
+            raise RuntimeError("Generalized contractions not supported. Call prepare_dump first.")
         iatom_new = shell.icenter
         if iatom_new != iatom_last:
             f.write("$$\n")
-        for iangmom, (angmom, kind) in enumerate(zip(shell.angmoms, shell.kinds)):
-            iatom_last = shell.icenter
-            nbasis = len(CONVENTIONS[(angmom, kind)])
-            f.write(f" {nbasis} {angmom_its(angmom).capitalize():1s} 1.00\n")
-            for exponent, coeff in zip(shell.exponents, shell.coeffs[:, iangmom]):
-                f.write(f"{exponent:20.10f} {coeff:17.10f}\n")
+        angmom = shell.angmoms[0]
+        kind = shell.kinds[0]
+        iatom_last = shell.icenter
+        nbasis = len(CONVENTIONS[(angmom, kind)])
+        f.write(f" {nbasis} {angmom_its(angmom).capitalize():1s} 1.00\n")
+        for exponent, coeff in zip(shell.exponents, shell.coeffs[:, 0]):
+            f.write(f"{exponent:20.10f} {coeff:17.10f}\n")
     f.write("\n")
     f.write("$END\n")
     f.write("\n")
@@ -388,7 +392,7 @@ def dump_one(f: TextIO, data: IOData):
         _dump_helper_occ(f, data, spin="b")
 
     else:
-        raise RuntimeError("This should not happen because of prepare_dump")
+        raise RuntimeError("Generalized orbitals are not support. Call prepare_dump first.")
 
 
 # Defining help dumping functions

diff --git a/iodata/formats/wfn.py b/iodata/formats/wfn.py
@@ -39,7 +39,7 @@
 from ..orbitals import MolecularOrbitals
 from ..overlap import gob_cart_normalization
 from ..periodic import num2sym, sym2num
-from ..prepare import prepare_unrestricted_aminusb
+from ..prepare import prepare_segmented, prepare_unrestricted_aminusb
 from ..utils import LineIterator, LoadError, PrepareDumpError
 
 __all__ = ()
@@ -537,7 +537,8 @@ def prepare_dump(data: IOData, allow_changes: bool, filename: str) -> IOData:
             raise PrepareDumpError(
                 "The WFN format only supports Cartesian MolecularBasis.", filename
             )
-    return prepare_unrestricted_aminusb(data, allow_changes, filename, "WFN")
+    data = prepare_unrestricted_aminusb(data, allow_changes, filename, "WFN")
+    return prepare_segmented(data, False, allow_changes, filename, "WFN")
 
 
 @document_dump_one(
@@ -550,13 +551,13 @@ def dump_one(f: TextIO, data: IOData) -> None:
     # get shells for the de-contracted basis
     shells = []
     for shell in data.obasis.shells:
-        for i, (angmom, kind) in enumerate(zip(shell.angmoms, shell.kinds)):
-            for exponent, coeff in zip(shell.exponents, shell.coeffs.T[i]):
-                shells.append(
-                    Shell(
-                        shell.icenter, [angmom], [kind], np.array([exponent]), coeff.reshape(-1, 1)
-                    )
-                )
+        if shell.ncon != 1:
+            raise RuntimeError("Generalized contractions not supported. Call prepare_dump first.")
+        # Decontract the shell
+        angmom = shell.angmoms[0]
+        kind = shell.kinds[0]
+        for exponent, coeff in zip(shell.exponents, shell.coeffs[:, 0]):
+            shells.append(Shell(shell.icenter, [angmom], [kind], [exponent], [[coeff]]))
     # make a new instance of MolecularBasis with de-contracted basis shells; ideally for WFN we
     # want the primitive basis set, but IOData only supports shells.
     obasis = MolecularBasis(shells, data.obasis.conventions, data.obasis.primitive_normalization)

diff --git a/iodata/formats/wfx.py b/iodata/formats/wfx.py
@@ -33,7 +33,7 @@
 from ..iodata import IOData
 from ..orbitals import MolecularOrbitals
 from ..periodic import num2sym
-from ..prepare import prepare_unrestricted_aminusb
+from ..prepare import prepare_segmented, prepare_unrestricted_aminusb
 from ..utils import LineIterator, LoadError, LoadWarning, PrepareDumpError
 from .wfn import CONVENTIONS, build_obasis, get_mocoeff_scales
 
@@ -369,7 +369,8 @@ def prepare_dump(data: IOData, allow_changes: bool, filename: str) -> IOData:
             raise PrepareDumpError(
                 "The WFX format only supports Cartesian MolecularBasis.", filename
             )
-    return prepare_unrestricted_aminusb(data, allow_changes, filename, "WFX")
+    data = prepare_unrestricted_aminusb(data, allow_changes, filename, "WFX")
+    return prepare_segmented(data, False, allow_changes, filename, "WFX")
 
 
 @document_dump_one(
@@ -390,13 +391,13 @@ def dump_one(f: TextIO, data: IOData):
     # get shells for the de-contracted basis
     shells = []
     for shell in data.obasis.shells:
-        for i, (angmom, kind) in enumerate(zip(shell.angmoms, shell.kinds)):
-            for exponent, coeff in zip(shell.exponents, shell.coeffs.T[i]):
-                shells.append(
-                    Shell(
-                        shell.icenter, [angmom], [kind], np.array([exponent]), coeff.reshape(-1, 1)
-                    )
-                )
+        if shell.ncon != 1:
+            raise RuntimeError("Generalized contractions not supported. Call prepare_dump first.")
+        # Decontract the shell
+        angmom = shell.angmoms[0]
+        kind = shell.kinds[0]
+        for exponent, coeff in zip(shell.exponents, shell.coeffs[:, 0]):
+            shells.append(Shell(shell.icenter, [angmom], [kind], [exponent], [[coeff]]))
     # make a new instance of MolecularBasis with de-contracted basis shells; ideally for WFX we
     # want the primitive basis set, but IOData only supports shells.
     obasis = MolecularBasis(shells, data.obasis.conventions, data.obasis.primitive_normalization)

diff --git a/iodata/overlap.py b/iodata/overlap.py
@@ -27,7 +27,7 @@
 
 from .basis import MolecularBasis, Shell
 from .convert import HORTON2_CONVENTIONS as OVERLAP_CONVENTIONS
-from .convert import convert_conventions, iter_cart_alphabet
+from .convert import convert_conventions, convert_to_segmented, iter_cart_alphabet
 from .overlap_cartpure import tfs
 
 __all__ = ("OVERLAP_CONVENTIONS", "compute_overlap", "gob_cart_normalization")
@@ -105,7 +105,7 @@ def compute_overlap(
         raise ValueError("The overlap integrals are only implemented for L2 normalization.")
 
     # Get a segmented basis, for simplicity
-    obasis0 = obasis0.get_segmented()
+    obasis0 = convert_to_segmented(obasis0)
 
     # Handle optional arguments
     if obasis1 is None:
@@ -126,7 +126,7 @@ def compute_overlap(
                 "array of atomic coordinates is expected."
             )
         # Get a segmented basis, for simplicity
-        obasis1 = obasis1.get_segmented()
+        obasis1 = convert_to_segmented(obasis1)
         identical = False
 
     # Initialize result