Skip to content

Commit

Permalink
A fixed molecular charge for the molecule generation. (#73)
Browse files Browse the repository at this point in the history
* A new function to set a fixed charge value

Signed-off-by: Jonathan Schöps <s6jtscho@uni-bonn.de>

* latest changes to the fixed charge method

Signed-off-by: Jonathan Schöps <s6jtscho@uni-bonn.de>

* Fixed charge and element composition works test have to be added

Signed-off-by: Jonathan Schöps <s6jtscho@uni-bonn.de>

* More modular fixed charge routine

Signed-off-by: Jonathan Schöps <s6jtscho@uni-bonn.de>

* complete fixed charge routine

Signed-off-by: Jonathan Schöps <s6jtscho@uni-bonn.de>

* Removed an unnessesary import

Signed-off-by: Jonathan Schöps <s6jtscho@uni-bonn.de>

* more convenient function name

Signed-off-by: Jonathan Schöps <s6jtscho@uni-bonn.de>

* Worked in the suggested changes

Signed-off-by: Jonathan Schöps <s6jtscho@uni-bonn.de>

* Added a changelog entry

Signed-off-by: Jonathan Schöps <s6jtscho@uni-bonn.de>

* Changes a bug with an array type

Signed-off-by: Jonathan Schöps <s6jtscho@uni-bonn.de>

* removed left over print statements

Signed-off-by: Jonathan Schöps <s6jtscho@uni-bonn.de>

* Implemented requested changes

Signed-off-by: Jonathan Schöps <s6jtscho@uni-bonn.de>

* Added a new test to raise a ValueError

Signed-off-by: Jonathan Schöps <s6jtscho@uni-bonn.de>

---------

Signed-off-by: Jonathan Schöps <s6jtscho@uni-bonn.de>
  • Loading branch information
jonathan-schoeps authored Nov 15, 2024
1 parent 92399dc commit 82aa3f5
Show file tree
Hide file tree
Showing 13 changed files with 365 additions and 41 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
- function which is able to printout the xyz coordinates to the terminal similar to the `.xyz` layout
- elements 87 to 103 are accessible via the element composition. If `xtb` is the engine, the elements will be replaced by their lighter homologues.
- support for `python-3.13`
- option to set a fixed molecular charge, while ensuring `uhf = 0`

### Breaking Changes
- Removal of the `dist_threshold` flag and in the `-toml` file.
Expand Down
2 changes: 2 additions & 0 deletions mindlessgen.toml
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,8 @@ element_composition = "C:2-3, H:1-2, O:1-2, N:1-*"
# > CAUTION: This option is overridden by the 'element_composition' option.
# > I.e., if an element is specified in 'element_composition' with an occurrence > 0, it will be added to the molecule anyway.
forbidden_elements = "57-71, 81-*"
# > Set a charge for the molecule generation. Options: "none" (random charge assignment), "int" or <int> (fixed charge assignment)
molecular_charge = "none"

[refine]
# > Maximum number of fragment optimization cycles. Options: <int>
Expand Down
7 changes: 7 additions & 0 deletions src/mindlessgen/cli/cli_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -143,6 +143,12 @@ def cli_parser(argv: Sequence[str] | None = None) -> dict:
required=False,
help="Contract the coordinates of the molecule after the coordinats generation.",
)
parser.add_argument(
"--molecular-charge",
type=str,
required=False,
help="Define the charge of the molecule.",
)

### Refinement arguments ###
parser.add_argument(
Expand Down Expand Up @@ -280,6 +286,7 @@ def cli_parser(argv: Sequence[str] | None = None) -> dict:
"scale_fragment_detection": args_dict["scale_fragment_detection"],
"scale_minimal_distance": args_dict["scale_minimal_distance"],
"contract_coords": args_dict["contract_coords"],
"molecular_charge": args_dict["molecular_charge"],
}
# XTB specific arguments
rev_args_dict["xtb"] = {"xtb_path": args_dict["xtb_path"]}
Expand Down
6 changes: 6 additions & 0 deletions src/mindlessgen/generator/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -173,6 +173,12 @@ def single_molecule_generator(
print(e)
stop_event.set()
return None
except RuntimeError as e:
if config.general.verbosity > 0:
print(f"Generation failed for cycle {cycle + 1}.")
if config.general.verbosity > 1:
print(e)
return None

try:
# ____ _ _ _
Expand Down
96 changes: 94 additions & 2 deletions src/mindlessgen/molecules/generate_molecule.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,12 +9,15 @@
from .refinement import get_cov_radii, COV_RADII
from .miscellaneous import (
set_random_charge,
calculate_protons,
get_alkali_metals,
get_alkaline_earth_metals,
get_three_d_metals,
get_four_d_metals,
get_five_d_metals,
get_lanthanides,
get_actinides,
calculate_ligand_electrons,
)


Expand Down Expand Up @@ -47,9 +50,12 @@ def generate_random_molecule(
ati=mol.ati,
scale_minimal_distance=config_generate.scale_minimal_distance,
)
mol.charge, mol.uhf = set_random_charge(mol.ati, verbosity)
if config_generate.molecular_charge is not None:
mol.charge = config_generate.molecular_charge
mol.uhf = 0
else:
mol.charge, mol.uhf = set_random_charge(mol.ati, verbosity)
mol.set_name_from_formula()

if verbosity > 1:
print(mol)

Expand Down Expand Up @@ -135,6 +141,22 @@ def generate_atom_list(cfg: GenerateConfig, verbosity: int = 1) -> np.ndarray:
+ "Setting the number of atoms for the fixed composition. "
+ f"Returning: \n{natoms}\n"
)
# If the molecular charge is defined, and a fixed element composition is defined, check if the electrons are even. If not raise an error.
if cfg.molecular_charge:
protons = calculate_protons(natoms)
nel = protons - cfg.molecular_charge
f_elem = any(
count > 0 and (i in get_lanthanides() or i in get_actinides())
for i, count in enumerate(natoms)
)
if (f_elem and calculate_ligand_electrons(natoms, nel) % 2 != 0) or (
not f_elem and nel % 2 != 0
):
raise SystemExit(
"Both fixed charge and fixed composition are defined. "
+ "Please only define one of them."
+ "Or ensure that the fixed composition is closed shell."
)
return natoms

# Reasoning for the parameters in the following sections:
Expand Down Expand Up @@ -324,6 +346,11 @@ def check_composition():
check_composition()
# Check if the number of atoms is within the defined limits
check_min_max_atoms()
# If the molecule is not closed shell, add an atom to ensure a closed shell system
if cfg.molecular_charge is not None:
protons = calculate_protons(natoms)
nel = protons - cfg.molecular_charge
natoms = fixed_charge_correction(cfg, natoms, nel, valid_elems, verbosity)
### ACTUAL WORKFLOW END ###

return natoms
Expand Down Expand Up @@ -431,3 +458,68 @@ def check_distances(
if r < scale_minimal_distance * sum_radii:
return False
return True


def fixed_charge_correction(
cfg: GenerateConfig,
natoms: np.ndarray,
nel: int,
valid_elems: set[int],
verbosity: int,
) -> np.ndarray:
"""
Correct the number of electrons if a fixed charge is given and the molecule is not closed shell.
"""
f_elem = any(
count > 0 and (i in get_lanthanides() or i in get_actinides())
for i, count in enumerate(natoms)
)
if f_elem:
ligand_electrons = calculate_ligand_electrons(natoms, nel)
# If f block elements are included, correct only if the remaning ligand protons are uneven
if ligand_electrons % 2 != 0:
natoms = fixed_charge_elem_correction(cfg, natoms, valid_elems, verbosity)
return natoms
# If f block elements are not included, correct if the number of electrons is uneven
elif nel % 2 != 0:
natoms = fixed_charge_elem_correction(cfg, natoms, valid_elems, verbosity)
return natoms
return natoms


def fixed_charge_elem_correction(
cfg: GenerateConfig,
natoms: np.ndarray,
valid_elems: set[int],
verbosity: int,
) -> np.ndarray:
"""
Correct the number of atoms if the number of electrons is odd and a molecular charge is set.
"""
num_atoms = np.sum(natoms)
# All other elements
rng = np.random.default_rng()
odd_atoms = np.array([elem for elem in valid_elems if elem % 2 == 0], dtype=int)
random_odd_atoms = rng.permutation(odd_atoms)
if 0 in valid_elems:
random_odd_atoms = np.insert(random_odd_atoms, 0, 0)
for random_elem in random_odd_atoms:
min_count, max_count = cfg.element_composition.get(
random_elem, (0, cfg.max_num_atoms)
)
if min_count is None:
min_count = 0
if max_count is None:
max_count = cfg.max_num_atoms
# Check if adding or removing the random element is possible
if natoms[random_elem] < max_count and num_atoms < cfg.max_num_atoms:
natoms[random_elem] += 1
if verbosity > 1:
print(f"Adding atom type {random_elem} for charge...")
return natoms
elif natoms[random_elem] > min_count and num_atoms > cfg.min_num_atoms:
natoms[random_elem] -= 1
if verbosity > 1:
print(f"Removing atom type {random_elem} for charge...")
return natoms
raise RuntimeError("Could not correct the odd number of electrons.")
32 changes: 31 additions & 1 deletion src/mindlessgen/molecules/miscellaneous.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,10 @@
import numpy as np


def set_random_charge(ati: np.ndarray, verbosity: int = 1) -> tuple[int, int]:
def set_random_charge(
ati: np.ndarray,
verbosity: int = 1,
) -> tuple[int, int]:
"""
Set the charge of a molecule so that unpaired electrons are avoided.
"""
Expand Down Expand Up @@ -51,6 +54,7 @@ def set_random_charge(ati: np.ndarray, verbosity: int = 1) -> tuple[int, int]:
print(
f"Number of protons from ligands (assuming negative charge): {ligand_protons}"
)

if ligand_protons % 2 == 0:
charge = 0
else:
Expand Down Expand Up @@ -81,6 +85,32 @@ def set_random_charge(ati: np.ndarray, verbosity: int = 1) -> tuple[int, int]:
return charge, uhf


def calculate_protons(natoms: np.ndarray) -> int:
"""
Calculate the number of protons in a molecule from the atom list.
"""
protons = 0
for i, atom_count in enumerate(natoms):
protons += atom_count * (i + 1)
return protons


def calculate_ligand_electrons(natoms: np.ndarray, nel: int) -> int:
"""
Calculate the number of ligand electrons in a molecule if lanthanides or actinides are within the molecule.
"""
f_electrons = sum(
occurrence
* (
ati - 2
) # subtract 3 to get the number of electrons for an Ln3+ (Ac3+) ion and add 1 to account for the 0 based indexing.
for ati, occurrence in enumerate(natoms)
if (ati in get_lanthanides() or ati in get_actinides())
)
ligand_electrons = nel - f_electrons
return ligand_electrons


def get_alkali_metals() -> list[int]:
"""
Get the atomic numbers of alkali metals.
Expand Down
42 changes: 36 additions & 6 deletions src/mindlessgen/molecules/refinement.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,13 @@
from ..qm.base import QMMethod
from ..prog import GenerateConfig, RefineConfig
from .molecule import Molecule
from .miscellaneous import set_random_charge
from .miscellaneous import (
set_random_charge,
calculate_protons,
calculate_ligand_electrons,
get_lanthanides,
get_actinides,
)

COV_RADII = "pyykko"
BOHR2AA = (
Expand Down Expand Up @@ -50,6 +56,7 @@ def iterative_optimization(
# Detect fragments from the optimized molecule
fragmols = detect_fragments(
mol=rev_mol,
molecular_charge=config_generate.molecular_charge,
vdw_scaling=config_generate.scale_fragment_detection,
verbosity=verbosity,
)
Expand Down Expand Up @@ -109,7 +116,23 @@ def iterative_optimization(
f"Element {ati} is overrepresented "
+ f"in the largest fragment in cycle {cycle + 1}."
)

if config_generate.molecular_charge is not None:
protons = calculate_protons(fragmols[0].atlist)
nel = protons - config_generate.molecular_charge
f_elem = any(
count > 0 and (i in get_lanthanides() or i in get_actinides())
for i, count in enumerate(fragmols[0].atlist)
)
if f_elem:
ligand_electrons = calculate_ligand_electrons(fragmols[0].atlist, nel)
if ligand_electrons % 2 != 0:
raise RuntimeError(
f"Number of electrons in the largest fragment in cycle {cycle + 1} is odd."
)
elif nel % 2 != 0:
raise RuntimeError(
f"Number of electrons in the largest fragment in cycle {cycle + 1} is odd."
)
rev_mol = fragmols[
0
] # Set current_mol to the first fragment for the next cycle
Expand All @@ -136,7 +159,10 @@ def iterative_optimization(


def detect_fragments(
mol: Molecule, vdw_scaling: float, verbosity: int = 1
mol: Molecule,
molecular_charge: int,
vdw_scaling: float,
verbosity: int = 1,
) -> list[Molecule]:
"""
Detects fragments in a molecular system based on atom positions and covalent radii.
Expand Down Expand Up @@ -201,9 +227,13 @@ def detect_fragments(
for atom in fragment_molecule.ati:
fragment_molecule.atlist[atom] += 1
# Update the charge of the fragment molecule
fragment_molecule.charge, fragment_molecule.uhf = set_random_charge(
fragment_molecule.ati, verbosity
)
if molecular_charge is not None:
fragment_molecule.charge = molecular_charge
fragment_molecule.uhf = 0
else:
fragment_molecule.charge, fragment_molecule.uhf = set_random_charge(
fragment_molecule.ati, verbosity
)
fragment_molecule.set_name_from_formula()
if verbosity > 1:
print(f"Fragment molecule: {fragment_molecule}")
Expand Down
23 changes: 23 additions & 0 deletions src/mindlessgen/prog/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -183,6 +183,7 @@ def __init__(self: GenerateConfig) -> None:
self._scale_fragment_detection: float = 1.25
self._scale_minimal_distance: float = 0.8
self._contract_coords: bool = True
self._molecular_charge: int | None = None

def get_identifier(self) -> str:
return "generate"
Expand Down Expand Up @@ -402,6 +403,28 @@ def contract_coords(self, contract_coords: bool):
raise TypeError("Contract coords should be a boolean.")
self._contract_coords = contract_coords

@property
def molecular_charge(self):
"""
Get the molecular_charge.
"""
return self._molecular_charge

@molecular_charge.setter
def molecular_charge(self, molecular_charge: str | int):
"""
Set the molecular_charge.
"""
if isinstance(molecular_charge, str):
if molecular_charge.lower() == "none" or molecular_charge == "":
self._molecular_charge = None
else:
self._molecular_charge = int(molecular_charge)
elif isinstance(molecular_charge, int):
self._molecular_charge = molecular_charge
else:
raise TypeError("Molecular charge should be a string or an integer.")


class RefineConfig(BaseConfig):
"""
Expand Down
Loading

0 comments on commit 82aa3f5

Please sign in to comment.