-
Notifications
You must be signed in to change notification settings - Fork 22
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Browse files
Browse the repository at this point in the history
* add numpy and openbabel as requirements * add files to ignore * refactor to python3 and add typing for superpose3D, squared_distance and rmsd functions * add typing for coordinates add and refactor map_to_crystal function * add parseArguments function with type annotations * update variable and function names add update_coordinates function with type annotations * add get_automorphism_RMSD function with type annotations * add save_molecule_with_RMSD function with type annotation * refactor superpose_3D function * update save_molecule_with_rmsd: remove unnecessary condition add main function * add aligned-fit sdf test file * add aligned-nofit sdf test file * add input sdf test file * add ref sdf test file * add conftest with fixtures folder path as constant * add sdrms conftest file * add aligned molecules raw data fixture * add aligned fit molecule raw data fixture * add integration tests * code formatting * add aligned_nofit_filename and aligned_fit_filename fixtures * add testing comparison functions for molecules: - atoms_are_equal - molecules_are_equal - compare_sdf_files * extract parser to its own file * add sdrmsd script package to pyproject.toml * update github actions workflow for rdock-utils - add ruff - remove flake, black, isort - add pytest * add dynamic dependencies to pyproject.toml * remove unnecessary docstrings * add old sdrmrsd to be execute as script * lint and typing
- Loading branch information
1 parent
c14f0fd
commit 5c7b9ea
Showing
21 changed files
with
4,187 additions
and
8 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file was deleted.
Oops, something went wrong.
Empty file.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,13 @@ | ||
from .parser import get_parser | ||
from .sdrmsd import SDRMSD | ||
|
||
|
||
def main(argv: list[str] | None = None) -> None: | ||
parser = get_parser() | ||
args = parser.parse_args(argv) | ||
sdrmsd = SDRMSD(args.reference, args.input, args.out, args.fit, args.threshold) | ||
sdrmsd.run() | ||
|
||
|
||
if __name__ == "__main__": | ||
main() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,54 @@ | ||
import argparse | ||
|
||
|
||
def get_parser() -> argparse.ArgumentParser: | ||
parser = argparse.ArgumentParser( | ||
prog="SDRMSD", | ||
usage="%(prog)s [options] reference.sdf input.sdf", | ||
description="Superpose molecules before RMSD calculation", | ||
epilog=( | ||
"Arguments:\n" | ||
" reference.sdf SDF file with the reference molecule.\n" | ||
" input.sdf SDF file with the molecules to be compared to reference.\n" | ||
), | ||
) | ||
parser.add_argument( | ||
"reference", | ||
type=str, | ||
help="Path to the SDF file with the reference molecule.", | ||
) | ||
parser.add_argument( | ||
"input", | ||
type=str, | ||
help="Path to the SDF file with the molecules to be compared to reference.", | ||
) | ||
parser.add_argument( | ||
"-f", | ||
"--fit", | ||
action="store_true", | ||
default=False, | ||
help="Superpose molecules before RMSD calculation", | ||
) | ||
parser.add_argument( | ||
"-t", | ||
"--threshold", | ||
action="store", | ||
default=None, | ||
type=float, | ||
help=( | ||
"Discard poses with RMSD < THRESHOLD with respect previous poses " | ||
"which were not rejected based on the same principle. A Population " | ||
"SDField will be added to output SD with the population number." | ||
), | ||
) | ||
parser.add_argument( | ||
"-o", | ||
"--out", | ||
default=None, | ||
metavar="FILE", | ||
help=( | ||
"If declared, write an output SDF file with the input molecules with " | ||
"a new sdfield <RMSD>. If the molecule was fitted, the fitted molecule coordinates will be saved." | ||
), | ||
) | ||
return parser |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,148 @@ | ||
import functools | ||
import logging | ||
import math | ||
|
||
from openbabel import pybel | ||
|
||
from .superpose3d import MolAlignmentData, Superpose3D | ||
from .types import AutomorphismRMSD, CoordsArray, PoseMatchData, SDRMSDData | ||
|
||
logger = logging.getLogger("SDRMSD") | ||
|
||
|
||
class SDRMSD: | ||
def __init__( | ||
self, | ||
reference_filename: str, | ||
input_filename: str, | ||
output_filename: str, | ||
fit: bool = False, | ||
threshold: float | None = None, | ||
) -> None: | ||
self.reference_filename = reference_filename | ||
self.input_filename = input_filename | ||
self.fit = fit | ||
self.output_filename = output_filename | ||
self.threshold = threshold | ||
|
||
def run(self) -> None: | ||
# Find the RMSD between the crystal pose and each docked pose | ||
docked_poses = pybel.readfile("sdf", self.input_filename) | ||
self.display_fit_message() | ||
data = SDRMSDData() | ||
|
||
# Read crystal pose | ||
crystal_pose = self.get_crystal_pose() | ||
crystal_atoms = len(crystal_pose.atoms) | ||
|
||
for i, docked_pose in enumerate(docked_poses, start=1): | ||
atoms_number = self.process_docked_pose(docked_pose) | ||
|
||
if atoms_number != crystal_atoms: | ||
data.skipped.append(i) | ||
continue | ||
|
||
rmsd_result = self.calculate_rmsd(crystal_pose, docked_pose) | ||
pose_match_data = PoseMatchData(i, docked_pose, data) | ||
self.handle_pose_matching(rmsd_result, pose_match_data) | ||
|
||
if self.output_filename: | ||
self.process_and_save_selected_molecules(data) | ||
|
||
if data.skipped: | ||
logger.warning(f"SKIPPED input molecules due to the number of atom mismatch: {data.skipped}") | ||
|
||
def get_automorphism_rmsd(self, target: pybel.Molecule, molecule: pybel.Molecule) -> AutomorphismRMSD: | ||
superposer = Superpose3D(MolAlignmentData(molecule), MolAlignmentData(target)) | ||
result = superposer.automorphism_rmsd(self.fit) | ||
return result | ||
|
||
def update_coordinates(self, obmol: pybel.Molecule, new_coordinates: CoordsArray) -> None: | ||
for i, atom in enumerate(obmol): | ||
atom.OBAtom.SetVector(*new_coordinates[i]) | ||
|
||
def get_best_matching_pose(self, pose_match_data: PoseMatchData) -> tuple[int | None, float]: | ||
threshold = self.threshold or math.inf | ||
docked_pose = pose_match_data.docked_pose | ||
molecules_dict = pose_match_data.sdrmsd_data.molecules_dict | ||
get_rmsd = functools.partial(self.get_automorphism_rmsd, target=docked_pose) | ||
poses_rmsd = ((index, get_rmsd(molecule)[0]) for index, molecule in molecules_dict.items()) | ||
filtered_by_threshold = (t for t in poses_rmsd if t[1] < threshold) | ||
return min(filtered_by_threshold, key=lambda t: t[1], default=(None, math.inf)) | ||
|
||
def process_and_save_selected_molecules(self, data: SDRMSDData) -> None: | ||
with pybel.Outputfile("sdf", self.output_filename, overwrite=True) as output_sdf: | ||
for i in sorted(data.out_dict.keys()): | ||
molecule, rmsd_result = data.out_dict[i] | ||
# Get the number of matches in the thresholding operation | ||
population_value = data.population.get(i, 1) | ||
self.save_molecule_with_rmsd(output_sdf, molecule, rmsd_result, population_value) | ||
|
||
def save_molecule_with_rmsd( | ||
self, output_sdf: pybel.Outputfile, molecule: pybel.Molecule, rmsd: float, population_value: int | ||
) -> None: | ||
new_data = pybel.ob.OBPairData() | ||
new_data.SetAttribute("RMSD") | ||
new_data.SetValue(f"{rmsd:.3f}") | ||
|
||
if self.threshold: | ||
pop_data = pybel.ob.OBPairData() | ||
pop_data.SetAttribute("Population") | ||
pop_data.SetValue(f"{population_value}") | ||
molecule.OBMol.CloneData(pop_data) | ||
|
||
molecule.OBMol.CloneData(new_data) | ||
output_sdf.write(molecule) | ||
|
||
def get_crystal_pose(self) -> pybel.Molecule: | ||
crystal_pose = next(pybel.readfile("sdf", self.reference_filename)) | ||
crystal_pose.removeh() | ||
return crystal_pose | ||
|
||
def display_fit_message(self) -> None: | ||
message = "FIT" if self.fit else "NOFIT" | ||
print(f"POSE\tRMSD_{message}") | ||
|
||
def process_docked_pose(self, docked_pose: pybel.Molecule) -> int: | ||
docked_pose.removeh() | ||
return len(docked_pose.atoms) | ||
|
||
def calculate_rmsd(self, crystal: pybel.Molecule, docked_pose: pybel.Molecule) -> float: | ||
rmsd, fitted_coords = self.get_automorphism_rmsd(crystal, docked_pose) | ||
|
||
if self.fit: | ||
if fitted_coords is not None: | ||
self.update_coordinates(docked_pose, fitted_coords) | ||
else: | ||
logger.warning("Automorphism failed. skipping alignment") | ||
|
||
return rmsd | ||
|
||
def handle_pose_matching(self, rmsd: float, pose_match_data: PoseMatchData) -> None: | ||
if self.threshold: | ||
match_pose, best_match_value = self.get_best_matching_pose(pose_match_data) | ||
if match_pose is not None: | ||
self.print_matching_info(pose_match_data, match_pose, best_match_value) | ||
else: | ||
self.save_and_print_info(rmsd, pose_match_data) | ||
else: | ||
self.save_and_print_info(rmsd, pose_match_data) | ||
|
||
def print_matching_info(self, pose_match_data: PoseMatchData, match_pose: int, best_match_value: float) -> None: | ||
index = pose_match_data.pose_index | ||
population = pose_match_data.sdrmsd_data.population | ||
logger.info(f"Pose {index} matches pose {match_pose} with {best_match_value:.3f} RMSD") | ||
population[match_pose] += 1 | ||
|
||
def save_and_print_info(self, rmsd: float, pose_match_data: PoseMatchData) -> None: | ||
index = pose_match_data.pose_index | ||
docked_pose = pose_match_data.docked_pose | ||
out_dict = pose_match_data.sdrmsd_data.out_dict | ||
molecules_dict = pose_match_data.sdrmsd_data.molecules_dict | ||
population = pose_match_data.sdrmsd_data.population | ||
|
||
if self.output_filename: | ||
out_dict[index] = (docked_pose, rmsd) | ||
print(f"{index}\t{rmsd:.2f}") | ||
molecules_dict[index] = docked_pose | ||
population[index] = 1 |
Oops, something went wrong.