Skip to content

Commit

Permalink
Issue #59/migrate sdrmsd to rdock_utils (#67)
Browse files Browse the repository at this point in the history
* add numpy and openbabel as requirements

* add files to ignore

* refactor to python3 and add typing for superpose3D, squared_distance and rmsd functions

* add typing for coordinates
add and refactor  map_to_crystal function

* add parseArguments function with type annotations

* update variable and function names
add update_coordinates function with type annotations

* add get_automorphism_RMSD function with type annotations

* add save_molecule_with_RMSD function with type annotation

* refactor superpose_3D function

* update save_molecule_with_rmsd: remove unnecessary condition
add main function

* add aligned-fit sdf test file

* add aligned-nofit sdf test file

* add input sdf test file

* add ref sdf test file

* add conftest with fixtures folder path as constant

* add sdrms conftest file

* add aligned molecules raw data fixture

* add aligned fit molecule raw data fixture

* add integration tests

* code formatting

* add aligned_nofit_filename and aligned_fit_filename fixtures

* add testing comparison functions for molecules:
- atoms_are_equal
- molecules_are_equal
- compare_sdf_files

* extract parser to its own file

* add sdrmsd script package to pyproject.toml

* update github actions workflow for rdock-utils
  - add ruff
  - remove flake, black, isort
  - add pytest

* add dynamic dependencies to pyproject.toml

* remove unnecessary docstrings

* add old sdrmrsd to be execute as script

* lint and typing
  • Loading branch information
lpardey authored Jan 25, 2024
1 parent 4584dc1 commit b3a3a35
Show file tree
Hide file tree
Showing 21 changed files with 4,187 additions and 8 deletions.
17 changes: 16 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -14,4 +14,19 @@ build/linux-g++-64

# test files
*.as
tests/results
tests/results

# rdock original files
rdock-utils/scripts copy/make_grid.csh
rdock-utils/scripts copy/rbhtfinder
rdock-utils/scripts copy/run_rbdock.pl
rdock-utils/scripts copy/run_rbscreen.pl
rdock-utils/scripts copy/sdfield
rdock-utils/scripts copy/sdfilter
rdock-utils/scripts copy/sdmodify
rdock-utils/scripts copy/sdreport
rdock-utils/scripts copy/sdrmsd
rdock-utils/scripts copy/sdsort
rdock-utils/scripts copy/sdsplit
rdock-utils/scripts copy/sdtether
rdock-utils/scripts copy/to_unix
13 changes: 13 additions & 0 deletions rdock-utils/Dockerfile
Original file line number Diff line number Diff line change
@@ -1,5 +1,18 @@
FROM python:3.12 AS base

RUN DEBIAN_FRONTEND=noninteractive apt update -y \
&& apt install -y --no-install-recommends \
swig \
openbabel \
libopenbabel-dev \
&& rm -rf /var/lib/apt/lists/*

# this is a bit of a mess but it is a quick way to be able to
# pip install openbable, we will improve it later
RUN ln -s /usr/include/openbabel3 /usr/local/include/openbabel3
RUN ln -s /lib/x86_64-linux-gnu/libopenbabel.so /usr/local/lib/libopenbabel.so


COPY requirements.txt requirements.txt
RUN python -m pip install --upgrade pip
RUN python -m pip install -r requirements.txt
Expand Down
11 changes: 9 additions & 2 deletions rdock-utils/pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,19 +1,26 @@
[project]
dynamic = ["dependencies", "optional-dependencies"]
name = "rdock-utils"
version = "0.1.0"
description = "Utilities for working with RDock and operating on SD files"
requires-python = ">=3.10.0"

[tool.setuptools.dynamic]
dependencies = {file = ["requirements.txt"]}
optional-dependencies = { dev = {file = ["requirements-dev.txt"]} }

[project.scripts]
sdfield = "rdock_utils.sdfield:main"
sdrmsd_old = "rdock_utils.sdrmsd_original:main"
sdrmsd = "rdock_utils.sdrmsd.main:main"

[project.urls]
Repository = "https://github.com/CBDD/rDock.git"

[tool.ruff]
line-length = 119
target-version = "py312"
exclude = [".git","__pycache__"]
exclude = [".git", "__pycache__", "rdock_utils/sdrmsd_original.py"]

[tool.ruff.lint]
select = ["E4", "E7", "E9", "F", "I"]
Expand Down Expand Up @@ -54,4 +61,4 @@ no_implicit_reexport = false

strict_equality = true

exclude = ['^setup\.py$']
exclude = ["rdock_utils/sdrmsd_original.py", "tests/"]
2 changes: 0 additions & 2 deletions rdock-utils/rdock_utils/sdnothing.py

This file was deleted.

Empty file.
13 changes: 13 additions & 0 deletions rdock-utils/rdock_utils/sdrmsd/main.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
from .parser import get_parser
from .sdrmsd import SDRMSD


def main(argv: list[str] | None = None) -> None:
parser = get_parser()
args = parser.parse_args(argv)
sdrmsd = SDRMSD(args.reference, args.input, args.out, args.fit, args.threshold)
sdrmsd.run()


if __name__ == "__main__":
main()
54 changes: 54 additions & 0 deletions rdock-utils/rdock_utils/sdrmsd/parser.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
import argparse


def get_parser() -> argparse.ArgumentParser:
parser = argparse.ArgumentParser(
prog="SDRMSD",
usage="%(prog)s [options] reference.sdf input.sdf",
description="Superpose molecules before RMSD calculation",
epilog=(
"Arguments:\n"
" reference.sdf SDF file with the reference molecule.\n"
" input.sdf SDF file with the molecules to be compared to reference.\n"
),
)
parser.add_argument(
"reference",
type=str,
help="Path to the SDF file with the reference molecule.",
)
parser.add_argument(
"input",
type=str,
help="Path to the SDF file with the molecules to be compared to reference.",
)
parser.add_argument(
"-f",
"--fit",
action="store_true",
default=False,
help="Superpose molecules before RMSD calculation",
)
parser.add_argument(
"-t",
"--threshold",
action="store",
default=None,
type=float,
help=(
"Discard poses with RMSD < THRESHOLD with respect previous poses "
"which were not rejected based on the same principle. A Population "
"SDField will be added to output SD with the population number."
),
)
parser.add_argument(
"-o",
"--out",
default=None,
metavar="FILE",
help=(
"If declared, write an output SDF file with the input molecules with "
"a new sdfield <RMSD>. If the molecule was fitted, the fitted molecule coordinates will be saved."
),
)
return parser
148 changes: 148 additions & 0 deletions rdock-utils/rdock_utils/sdrmsd/sdrmsd.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,148 @@
import functools
import logging
import math

from openbabel import pybel

from .superpose3d import MolAlignmentData, Superpose3D
from .types import AutomorphismRMSD, CoordsArray, PoseMatchData, SDRMSDData

logger = logging.getLogger("SDRMSD")


class SDRMSD:
def __init__(
self,
reference_filename: str,
input_filename: str,
output_filename: str,
fit: bool = False,
threshold: float | None = None,
) -> None:
self.reference_filename = reference_filename
self.input_filename = input_filename
self.fit = fit
self.output_filename = output_filename
self.threshold = threshold

def run(self) -> None:
# Find the RMSD between the crystal pose and each docked pose
docked_poses = pybel.readfile("sdf", self.input_filename)
self.display_fit_message()
data = SDRMSDData()

# Read crystal pose
crystal_pose = self.get_crystal_pose()
crystal_atoms = len(crystal_pose.atoms)

for i, docked_pose in enumerate(docked_poses, start=1):
atoms_number = self.process_docked_pose(docked_pose)

if atoms_number != crystal_atoms:
data.skipped.append(i)
continue

rmsd_result = self.calculate_rmsd(crystal_pose, docked_pose)
pose_match_data = PoseMatchData(i, docked_pose, data)
self.handle_pose_matching(rmsd_result, pose_match_data)

if self.output_filename:
self.process_and_save_selected_molecules(data)

if data.skipped:
logger.warning(f"SKIPPED input molecules due to the number of atom mismatch: {data.skipped}")

def get_automorphism_rmsd(self, target: pybel.Molecule, molecule: pybel.Molecule) -> AutomorphismRMSD:
superposer = Superpose3D(MolAlignmentData(molecule), MolAlignmentData(target))
result = superposer.automorphism_rmsd(self.fit)
return result

def update_coordinates(self, obmol: pybel.Molecule, new_coordinates: CoordsArray) -> None:
for i, atom in enumerate(obmol):
atom.OBAtom.SetVector(*new_coordinates[i])

def get_best_matching_pose(self, pose_match_data: PoseMatchData) -> tuple[int | None, float]:
threshold = self.threshold or math.inf
docked_pose = pose_match_data.docked_pose
molecules_dict = pose_match_data.sdrmsd_data.molecules_dict
get_rmsd = functools.partial(self.get_automorphism_rmsd, target=docked_pose)
poses_rmsd = ((index, get_rmsd(molecule)[0]) for index, molecule in molecules_dict.items())
filtered_by_threshold = (t for t in poses_rmsd if t[1] < threshold)
return min(filtered_by_threshold, key=lambda t: t[1], default=(None, math.inf))

def process_and_save_selected_molecules(self, data: SDRMSDData) -> None:
with pybel.Outputfile("sdf", self.output_filename, overwrite=True) as output_sdf:
for i in sorted(data.out_dict.keys()):
molecule, rmsd_result = data.out_dict[i]
# Get the number of matches in the thresholding operation
population_value = data.population.get(i, 1)
self.save_molecule_with_rmsd(output_sdf, molecule, rmsd_result, population_value)

def save_molecule_with_rmsd(
self, output_sdf: pybel.Outputfile, molecule: pybel.Molecule, rmsd: float, population_value: int
) -> None:
new_data = pybel.ob.OBPairData()
new_data.SetAttribute("RMSD")
new_data.SetValue(f"{rmsd:.3f}")

if self.threshold:
pop_data = pybel.ob.OBPairData()
pop_data.SetAttribute("Population")
pop_data.SetValue(f"{population_value}")
molecule.OBMol.CloneData(pop_data)

molecule.OBMol.CloneData(new_data)
output_sdf.write(molecule)

def get_crystal_pose(self) -> pybel.Molecule:
crystal_pose = next(pybel.readfile("sdf", self.reference_filename))
crystal_pose.removeh()
return crystal_pose

def display_fit_message(self) -> None:
message = "FIT" if self.fit else "NOFIT"
print(f"POSE\tRMSD_{message}")

def process_docked_pose(self, docked_pose: pybel.Molecule) -> int:
docked_pose.removeh()
return len(docked_pose.atoms)

def calculate_rmsd(self, crystal: pybel.Molecule, docked_pose: pybel.Molecule) -> float:
rmsd, fitted_coords = self.get_automorphism_rmsd(crystal, docked_pose)

if self.fit:
if fitted_coords is not None:
self.update_coordinates(docked_pose, fitted_coords)
else:
logger.warning("Automorphism failed. skipping alignment")

return rmsd

def handle_pose_matching(self, rmsd: float, pose_match_data: PoseMatchData) -> None:
if self.threshold:
match_pose, best_match_value = self.get_best_matching_pose(pose_match_data)
if match_pose is not None:
self.print_matching_info(pose_match_data, match_pose, best_match_value)
else:
self.save_and_print_info(rmsd, pose_match_data)
else:
self.save_and_print_info(rmsd, pose_match_data)

def print_matching_info(self, pose_match_data: PoseMatchData, match_pose: int, best_match_value: float) -> None:
index = pose_match_data.pose_index
population = pose_match_data.sdrmsd_data.population
logger.info(f"Pose {index} matches pose {match_pose} with {best_match_value:.3f} RMSD")
population[match_pose] += 1

def save_and_print_info(self, rmsd: float, pose_match_data: PoseMatchData) -> None:
index = pose_match_data.pose_index
docked_pose = pose_match_data.docked_pose
out_dict = pose_match_data.sdrmsd_data.out_dict
molecules_dict = pose_match_data.sdrmsd_data.molecules_dict
population = pose_match_data.sdrmsd_data.population

if self.output_filename:
out_dict[index] = (docked_pose, rmsd)
print(f"{index}\t{rmsd:.2f}")
molecules_dict[index] = docked_pose
population[index] = 1
Loading

0 comments on commit b3a3a35

Please sign in to comment.