Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Make scaling factor for van-der-Waals radii for fragment detection adjustable #45

Merged
merged 4 commits into from
Sep 25, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 8 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,14 @@ All notable changes to this project will be documented in this file.
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).

## [Unreleased]
### Changed
- vdW radii scaling parameter can now be adjusted via `mindlessgen.toml` or CLI

### Fixed
- Unit conversion for (currenly unused) vdW radii from the original Fortran project
- minor print output issues (no new line breaks...)

## [0.4.0] - 2024-09-19
### Changed
- Default file name of `.xyz` file contains prefix `mlm_`
Expand Down
2 changes: 2 additions & 0 deletions mindlessgen.toml
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,8 @@ init_scaling = 3.0
increase_scaling_factor = 1.3
# > Distance threshold for the inital, randomly generated coordinates. Options: <float>
dist_threshold = 1.2
# > Scaling factor for the employed van der Waals radii. Options: <float>
scale_vdw_radii = 1.3333
# > Atom types and their minimum and maximum occurrences. Format: "<element>:<min_count>-<max_count>"
# > Elements that are not specified are only added by random selection.
# > A star sign (*) can be used as a wildcard for integer value.
Expand Down
7 changes: 7 additions & 0 deletions src/mindlessgen/cli/cli_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -86,6 +86,12 @@ def cli_parser(argv: Sequence[str] | None = None) -> dict:
required=False,
help="Do not write the molecules to xyz files.",
)
parser.add_argument(
"--scale-vdw-radii",
type=float,
required=False,
help="Scaling factor for van der Waals radii.",
)

### Molecule generation arguments ###
parser.add_argument(
Expand Down Expand Up @@ -266,6 +272,7 @@ def cli_parser(argv: Sequence[str] | None = None) -> dict:
"dist_threshold": args_dict["dist_threshold"],
"element_composition": args_dict["element_composition"],
"forbidden_elements": args_dict["forbidden_elements"],
"scale_vdw_radii": args_dict["scale_vdw_radii"],
}
# XTB specific arguments
rev_args_dict["xtb"] = {"xtb_path": args_dict["xtb_path"]}
Expand Down
2 changes: 1 addition & 1 deletion src/mindlessgen/molecules/molecule.py
Original file line number Diff line number Diff line change
Expand Up @@ -185,7 +185,7 @@ def __str__(self) -> str:
if self._atlist.size:
if not first_line:
returnstr += "\n"
returnstr += f"atomic numbers: {self.atlist}"
returnstr += f"atomic numbers: {self.atlist}\n"
returnstr += f"sum formula: {self.sum_formula()}"
first_line = False
if self._xyz.size:
Expand Down
39 changes: 29 additions & 10 deletions src/mindlessgen/molecules/refinement.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,10 @@
from .miscellaneous import set_random_charge

COV_RADII = "pyykko"
BOHR2AA = (
0.529177210544 # taken from https://physics.nist.gov/cgi-bin/cuu/Value?bohrrada0
)
AA2BOHR = 1 / BOHR2AA


def iterative_optimization(
Expand Down Expand Up @@ -44,7 +48,11 @@ def iterative_optimization(
print(f"Optimized molecule in cycle {cycle + 1}:\n{rev_mol.xyz}")

# Detect fragments from the optimized molecule
fragmols = detect_fragments(rev_mol, verbosity)
fragmols = detect_fragments(
mol=rev_mol,
vdw_scaling=config_generate.scale_vdw_radii,
verbosity=verbosity,
)

# Extract the number of atoms from each fragment for comparison
current_atom_counts = [fragmol.num_atoms for fragmol in fragmols]
Expand Down Expand Up @@ -127,7 +135,9 @@ def iterative_optimization(
return rev_mol


def detect_fragments(mol: Molecule, verbosity: int = 1) -> list[Molecule]:
def detect_fragments(
mol: Molecule, vdw_scaling: float, verbosity: int = 1
) -> list[Molecule]:
"""
Detects fragments in a molecular system based on atom positions and covalent radii.

Expand All @@ -152,7 +162,16 @@ def detect_fragments(mol: Molecule, verbosity: int = 1) -> list[Molecule]:
sum_radii = get_cov_radii(mol.ati[i], COV_RADII) + get_cov_radii(
mol.ati[j], COV_RADII
)
if distance <= sum_radii * 1:
if verbosity > 2:
print(f"Distance between atom {i} and {j}: {distance:6.3f}")
print(
f"Covalent radii of atom {i} and {j}, "
+ "and the effective threshold: "
+ f"{get_cov_radii(mol.ati[i], COV_RADII):6.3f}, "
+ f"{get_cov_radii(mol.ati[j], COV_RADII):6.3f}, "
+ f"{(sum_radii * vdw_scaling):6.3f}"
)
if distance <= sum_radii * vdw_scaling:
graph.add_edge(i, j)

# Detect connected components (fragments)
Expand Down Expand Up @@ -196,10 +215,10 @@ def detect_fragments(mol: Molecule, verbosity: int = 1) -> list[Molecule]:

def get_cov_radii(at: int, vdw_radii: str = "mlmgen") -> float:
"""
D3 covalent radii.
Get the covalent radius of an atom in Angstrom, and scale it by a factor.
"""
if vdw_radii == "mlmgen":
rcov = [
rcov = [ # CAUTION: array is given in units of Bohr!
0.80628308,
1.15903197,
3.02356173,
Expand Down Expand Up @@ -295,11 +314,13 @@ def get_cov_radii(at: int, vdw_radii: str = "mlmgen") -> float:
3.88023730,
3.90543362,
]
return rcov[at]
# multiply the whole array with the BOHR2AA factor to get the radii in Angstrom
rcov = [rad * BOHR2AA for rad in rcov]
elif vdw_radii == "pyykko":
# Covalent radii (taken from Pyykko and Atsumi, Chem. Eur. J. 15, 2009, 188-197)
# Values for metals decreased by 10%
covalent_rad_2009 = [
# D3 covalent radii used to construct the coordination number
rcov = [
0.32,
0.46, # H, He
1.20,
Expand Down Expand Up @@ -419,8 +440,6 @@ def get_cov_radii(at: int, vdw_radii: str = "mlmgen") -> float:
1.48,
1.57, # Nh-Og
]
# D3 covalent radii used to construct the coordination number
covalent_rad_d3 = [4.0 / 3.0 * rad for rad in covalent_rad_2009]
return covalent_rad_d3[at]
else:
raise ValueError("Invalid vdw_radii argument.")
return rcov[at]
19 changes: 19 additions & 0 deletions src/mindlessgen/prog/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -179,6 +179,7 @@ def __init__(self: GenerateConfig) -> None:
self._increase_scaling_factor: float = 1.3
self._element_composition: dict[int, tuple[int | None, int | None]] = {}
self._forbidden_elements: list[int] | None = None
self._scale_vdw_radii: float = 4.0 / 3.0

def get_identifier(self) -> str:
return "generate"
Expand Down Expand Up @@ -364,6 +365,24 @@ def forbidden_elements(self: GenerateConfig, forbidden_str: str) -> None:

self._forbidden_elements = sorted(list(forbidden_set))

@property
def scale_vdw_radii(self):
"""
Get the scaling factor for van der Waals radii.
"""
return self._scale_vdw_radii

@scale_vdw_radii.setter
def scale_vdw_radii(self, scale_vdw_radii: float):
"""
Set the scaling factor for van der Waals radii.
"""
if not isinstance(scale_vdw_radii, float):
raise TypeError("Scale van der Waals radii should be a float.")
if scale_vdw_radii <= 0:
raise ValueError("Scale van der Waals radii should be greater than 0.")
self._scale_vdw_radii = scale_vdw_radii


class RefineConfig(BaseConfig):
"""
Expand Down
4 changes: 3 additions & 1 deletion test/test_molecules/test_refinement.py
Original file line number Diff line number Diff line change
Expand Up @@ -106,7 +106,9 @@ def test_detect_fragments_H6O2B2Ne2I1Os1Tl1(
"""
Test the detection of fragments in the molecule H2O2B2I1Os1.
"""
fragmols = detect_fragments(mol_H6O2B2Ne2I1Os1Tl1, verbosity=0)
fragmols = detect_fragments(
mol=mol_H6O2B2Ne2I1Os1Tl1, vdw_scaling=1.3333, verbosity=0
)
assert len(fragmols) == 7

# check that the first fragment is equal to the molecule H2O2B2I1Os1
Expand Down