Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

A fixed molecular charge for the molecule generation. #73

Merged
merged 17 commits into from
Nov 15, 2024
Merged
Show file tree
Hide file tree
Changes from 16 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
- function which is able to printout the xyz coordinates to the terminal similar to the `.xyz` layout
- elements 87 to 103 are accessible via the element composition. If `xtb` is the engine, the elements will be replaced by their lighter homologues.
- support for `python-3.13`
- option to set a fixed molecular charge, while ensuring `uhf = 0`

### Breaking Changes
- Removal of the `dist_threshold` flag and in the `-toml` file.
Expand Down
2 changes: 2 additions & 0 deletions mindlessgen.toml
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,8 @@ element_composition = "C:2-3, H:1-2, O:1-2, N:1-*"
# > CAUTION: This option is overridden by the 'element_composition' option.
# > I.e., if an element is specified in 'element_composition' with an occurrence > 0, it will be added to the molecule anyway.
forbidden_elements = "57-71, 81-*"
# > Set a charge for the molecule generation. Options: "none" or "" (turn it off), "int" or <int>
jonathan-schoeps marked this conversation as resolved.
Show resolved Hide resolved
molecular_charge = "none"

[refine]
# > Maximum number of fragment optimization cycles. Options: <int>
Expand Down
7 changes: 7 additions & 0 deletions src/mindlessgen/cli/cli_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -143,6 +143,12 @@ def cli_parser(argv: Sequence[str] | None = None) -> dict:
required=False,
help="Contract the coordinates of the molecule after the coordinats generation.",
)
parser.add_argument(
"--molecular-charge",
type=str,
required=False,
help="Define the charge of the molecule.",
)

### Refinement arguments ###
parser.add_argument(
Expand Down Expand Up @@ -280,6 +286,7 @@ def cli_parser(argv: Sequence[str] | None = None) -> dict:
"scale_fragment_detection": args_dict["scale_fragment_detection"],
"scale_minimal_distance": args_dict["scale_minimal_distance"],
"contract_coords": args_dict["contract_coords"],
"molecular_charge": args_dict["molecular_charge"],
}
# XTB specific arguments
rev_args_dict["xtb"] = {"xtb_path": args_dict["xtb_path"]}
Expand Down
6 changes: 6 additions & 0 deletions src/mindlessgen/generator/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -173,6 +173,12 @@ def single_molecule_generator(
print(e)
stop_event.set()
return None
except RuntimeError as e:
if config.general.verbosity > 0:
print(f"Generation failed for cycle {cycle + 1}.")
if config.general.verbosity > 1:
print(e)
return None

try:
# ____ _ _ _
Expand Down
96 changes: 94 additions & 2 deletions src/mindlessgen/molecules/generate_molecule.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,12 +9,15 @@
from .refinement import get_cov_radii, COV_RADII
from .miscellaneous import (
set_random_charge,
calculate_protons,
get_alkali_metals,
get_alkaline_earth_metals,
get_three_d_metals,
get_four_d_metals,
get_five_d_metals,
get_lanthanides,
get_actinides,
calculate_ligand_electrons,
)


Expand Down Expand Up @@ -47,9 +50,12 @@ def generate_random_molecule(
ati=mol.ati,
scale_minimal_distance=config_generate.scale_minimal_distance,
)
mol.charge, mol.uhf = set_random_charge(mol.ati, verbosity)
if config_generate.molecular_charge is not None:
mol.charge = config_generate.molecular_charge
mol.uhf = 0
else:
mol.charge, mol.uhf = set_random_charge(mol.ati, verbosity)
mol.set_name_from_formula()

if verbosity > 1:
print(mol)

Expand Down Expand Up @@ -135,6 +141,22 @@ def generate_atom_list(cfg: GenerateConfig, verbosity: int = 1) -> np.ndarray:
+ "Setting the number of atoms for the fixed composition. "
+ f"Returning: \n{natoms}\n"
)
# If the molecular charge is defined, and a fixed element composition is defined, check if the electrons are even. If not raise an error.
if cfg.molecular_charge:
protons = calculate_protons(natoms)
nel = protons - cfg.molecular_charge
f_elem = any(
count > 0 and (i in get_lanthanides() or i in get_actinides())
for i, count in enumerate(natoms)
)
if (f_elem and calculate_ligand_electrons(natoms, nel) % 2 != 0) or (
not f_elem and nel % 2 != 0
):
raise SystemExit(
"Both fixed charge and fixed composition are defined. "
+ "Please only define one of them."
+ "Or ensure that the fixed composition is closed shell."
)
return natoms

# Reasoning for the parameters in the following sections:
Expand Down Expand Up @@ -324,6 +346,11 @@ def check_composition():
check_composition()
# Check if the number of atoms is within the defined limits
check_min_max_atoms()
# If the molecule is not closed shell, add an atom to ensure a closed shell system
if cfg.molecular_charge is not None:
protons = calculate_protons(natoms)
nel = protons - cfg.molecular_charge
natoms = fixed_charge_correction(cfg, natoms, nel, valid_elems, verbosity)
### ACTUAL WORKFLOW END ###

return natoms
Expand Down Expand Up @@ -431,3 +458,68 @@ def check_distances(
if r < scale_minimal_distance * sum_radii:
return False
return True


def fixed_charge_correction(
cfg: GenerateConfig,
natoms: np.ndarray,
nel: int,
valid_elems: set[int],
verbosity: int,
) -> np.ndarray:
"""
Correct the number of electrons if a fixed charge is given and the molecule is not closed shell.
"""
f_elem = any(
count > 0 and (i in get_lanthanides() or i in get_actinides())
for i, count in enumerate(natoms)
)
if f_elem:
ligand_electrons = calculate_ligand_electrons(natoms, nel)
# If f block elements are included, correct only if the remaning ligand protons are uneven
if ligand_electrons % 2 != 0:
natoms = fixed_charge_elem_correction(cfg, natoms, valid_elems, verbosity)
return natoms
# If f block elements are not included, correct if the number of electrons is uneven
elif nel % 2 != 0:
natoms = fixed_charge_elem_correction(cfg, natoms, valid_elems, verbosity)
return natoms
return natoms


def fixed_charge_elem_correction(
cfg: GenerateConfig,
natoms: np.ndarray,
valid_elems: set[int],
verbosity: int,
) -> np.ndarray:
"""
Correct the number of atoms if the number of electrons is odd and a molecular charge is set.
"""
num_atoms = np.sum(natoms)
# All other elements
rng = np.random.default_rng()
odd_atoms = np.array([elem for elem in valid_elems if elem % 2 == 0], dtype=int)
random_odd_atoms = rng.permutation(odd_atoms)
if 0 in valid_elems:
random_odd_atoms = np.insert(random_odd_atoms, 0, 0)
for random_elem in random_odd_atoms:
min_count, max_count = cfg.element_composition.get(
random_elem, (0, cfg.max_num_atoms)
)
if min_count is None:
min_count = 0
if max_count is None:
max_count = cfg.max_num_atoms
marcelmbn marked this conversation as resolved.
Show resolved Hide resolved
# Check if adding or removing the random element is possible
if natoms[random_elem] < max_count and num_atoms < cfg.max_num_atoms:
natoms[random_elem] += 1
if verbosity > 1:
print(f"Adding atom type {random_elem} for charge...")
return natoms
elif natoms[random_elem] > min_count and num_atoms > cfg.min_num_atoms:
natoms[random_elem] -= 1
if verbosity > 1:
print(f"Removing atom type {random_elem} for charge...")
return natoms
raise RuntimeError("Could not correct the odd number of electrons.")
32 changes: 31 additions & 1 deletion src/mindlessgen/molecules/miscellaneous.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,10 @@
import numpy as np


def set_random_charge(ati: np.ndarray, verbosity: int = 1) -> tuple[int, int]:
def set_random_charge(
ati: np.ndarray,
verbosity: int = 1,
) -> tuple[int, int]:
"""
Set the charge of a molecule so that unpaired electrons are avoided.
"""
Expand Down Expand Up @@ -51,6 +54,7 @@ def set_random_charge(ati: np.ndarray, verbosity: int = 1) -> tuple[int, int]:
print(
f"Number of protons from ligands (assuming negative charge): {ligand_protons}"
)

if ligand_protons % 2 == 0:
charge = 0
else:
Expand Down Expand Up @@ -81,6 +85,32 @@ def set_random_charge(ati: np.ndarray, verbosity: int = 1) -> tuple[int, int]:
return charge, uhf


def calculate_protons(natoms: np.ndarray) -> int:
"""
Calculate the number of protons in a molecule from the atom list.
"""
protons = 0
for i, atom_count in enumerate(natoms):
protons += atom_count * (i + 1)
return protons


def calculate_ligand_electrons(natoms: np.ndarray, nel: int) -> int:
"""
Calculate the number of ligand electrons in a molecule if lanthanides or actinides are within the molecule.
"""
f_electrons = sum(
occurence
* (
ati - 3 + 1
jonathan-schoeps marked this conversation as resolved.
Show resolved Hide resolved
) # subtract 3 to get the number of electrons for an Ln3+ (Ac3+) ion.
for ati, occurence in enumerate(natoms)
jonathan-schoeps marked this conversation as resolved.
Show resolved Hide resolved
if (ati in get_lanthanides() or ati in get_actinides())
)
ligand_electrons = nel - f_electrons
return ligand_electrons


def get_alkali_metals() -> list[int]:
"""
Get the atomic numbers of alkali metals.
Expand Down
42 changes: 36 additions & 6 deletions src/mindlessgen/molecules/refinement.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,13 @@
from ..qm.base import QMMethod
from ..prog import GenerateConfig, RefineConfig
from .molecule import Molecule
from .miscellaneous import set_random_charge
from .miscellaneous import (
set_random_charge,
calculate_protons,
calculate_ligand_electrons,
get_lanthanides,
get_actinides,
)

COV_RADII = "pyykko"
BOHR2AA = (
Expand Down Expand Up @@ -50,6 +56,7 @@ def iterative_optimization(
# Detect fragments from the optimized molecule
fragmols = detect_fragments(
mol=rev_mol,
molecular_charge=config_generate.molecular_charge,
vdw_scaling=config_generate.scale_fragment_detection,
verbosity=verbosity,
)
Expand Down Expand Up @@ -109,7 +116,23 @@ def iterative_optimization(
f"Element {ati} is overrepresented "
+ f"in the largest fragment in cycle {cycle + 1}."
)

if config_generate.molecular_charge is not None:
protons = calculate_protons(fragmols[0].atlist)
nel = protons - config_generate.molecular_charge
f_elem = any(
count > 0 and (i in get_lanthanides() or i in get_actinides())
for i, count in enumerate(fragmols[0].atlist)
)
if f_elem:
ligand_electrons = calculate_ligand_electrons(fragmols[0].atlist, nel)
if ligand_electrons % 2 != 0:
raise RuntimeError(
f"Number of electrons in the largest fragment in cycle {cycle + 1} is odd."
)
elif nel % 2 != 0:
raise RuntimeError(
f"Number of electrons in the largest fragment in cycle {cycle + 1} is odd."
)
rev_mol = fragmols[
0
] # Set current_mol to the first fragment for the next cycle
Expand All @@ -136,7 +159,10 @@ def iterative_optimization(


def detect_fragments(
mol: Molecule, vdw_scaling: float, verbosity: int = 1
mol: Molecule,
molecular_charge: int,
vdw_scaling: float,
verbosity: int = 1,
) -> list[Molecule]:
"""
Detects fragments in a molecular system based on atom positions and covalent radii.
Expand Down Expand Up @@ -201,9 +227,13 @@ def detect_fragments(
for atom in fragment_molecule.ati:
fragment_molecule.atlist[atom] += 1
# Update the charge of the fragment molecule
fragment_molecule.charge, fragment_molecule.uhf = set_random_charge(
fragment_molecule.ati, verbosity
)
if molecular_charge is not None:
fragment_molecule.charge = molecular_charge
fragment_molecule.uhf = 0
else:
fragment_molecule.charge, fragment_molecule.uhf = set_random_charge(
fragment_molecule.ati, verbosity
)
fragment_molecule.set_name_from_formula()
if verbosity > 1:
print(f"Fragment molecule: {fragment_molecule}")
Expand Down
23 changes: 23 additions & 0 deletions src/mindlessgen/prog/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -183,6 +183,7 @@ def __init__(self: GenerateConfig) -> None:
self._scale_fragment_detection: float = 1.25
self._scale_minimal_distance: float = 0.8
self._contract_coords: bool = True
self._molecular_charge: int | None = None

def get_identifier(self) -> str:
return "generate"
Expand Down Expand Up @@ -402,6 +403,28 @@ def contract_coords(self, contract_coords: bool):
raise TypeError("Contract coords should be a boolean.")
self._contract_coords = contract_coords

@property
def molecular_charge(self):
"""
Get the molecular_charge.
"""
return self._molecular_charge

@molecular_charge.setter
def molecular_charge(self, molecular_charge: str | int):
"""
Set the molecular_charge.
"""
if isinstance(molecular_charge, str):
if molecular_charge.lower() == "none" or molecular_charge == "":
self._molecular_charge = None
else:
self._molecular_charge = int(molecular_charge)
elif isinstance(molecular_charge, int):
self._molecular_charge = molecular_charge
else:
raise TypeError("Molecular charge should be a string or an integer.")


class RefineConfig(BaseConfig):
"""
Expand Down
Loading