From 2be6ae9ad4dde03be279b31041170e38085ff668 Mon Sep 17 00:00:00 2001 From: "Matthew W. Thompson" Date: Mon, 14 Nov 2022 16:01:48 -0600 Subject: [PATCH] Lint --- openff/qcsubmit/datasets/datasets.py | 53 ++++---- openff/qcsubmit/procedures.py | 2 +- openff/qcsubmit/results/caching.py | 18 ++- openff/qcsubmit/results/filters.py | 23 +++- openff/qcsubmit/results/results.py | 126 ++++++++++++------ openff/qcsubmit/tests/results/__init__.py | 10 +- openff/qcsubmit/tests/results/test_caching.py | 2 +- openff/qcsubmit/tests/results/test_filters.py | 2 +- openff/qcsubmit/tests/results/test_results.py | 13 +- 9 files changed, 153 insertions(+), 96 deletions(-) diff --git a/openff/qcsubmit/datasets/datasets.py b/openff/qcsubmit/datasets/datasets.py index 3989b457..af9af83b 100644 --- a/openff/qcsubmit/datasets/datasets.py +++ b/openff/qcsubmit/datasets/datasets.py @@ -25,15 +25,11 @@ from qcportal.datasets.optimization import OptimizationDatasetNewEntry from qcportal.datasets.singlepoint import SinglepointDatasetNewEntry from qcportal.datasets.torsiondrive import TorsiondriveDatasetNewEntry -from qcportal.records.singlepoint import SinglepointDriver, QCSpecification from qcportal.records.optimization import OptimizationSpecification +from qcportal.records.singlepoint import QCSpecification, SinglepointDriver from typing_extensions import Literal -from openff.qcsubmit.common_structures import ( - CommonBase, - Metadata, - MoleculeAttributes, -) +from openff.qcsubmit.common_structures import CommonBase, Metadata, MoleculeAttributes from openff.qcsubmit.constraints import Constraints from openff.qcsubmit.datasets.entries import ( DatasetEntry, @@ -150,7 +146,6 @@ def _get_specifications(self) -> "OptimizationSpecification": """ raise NotImplementedError() - @abc.abstractmethod def _get_entries(self) -> List[Any]: """Add entries to the Dataset's corresponding Collection. @@ -167,7 +162,6 @@ def _get_entries(self) -> List[Any]: """ pass - @abc.abstractmethod def to_tasks(self) -> Dict[str, List[Union[AtomicInput, OptimizationInput]]]: """ @@ -199,7 +193,6 @@ def submit( """ - # pre submission checks # make sure we have some QCSpec to submit self._check_qc_specs() @@ -738,7 +731,6 @@ def _molecules_to_inchikey(self) -> List[str]: return inchikey - # TODO: SinglepointDataset class BasicDataset(_BaseDataset): """ @@ -799,7 +791,9 @@ def __add__(self, other: "BasicDataset") -> "BasicDataset": return new_dataset - def _generate_collection(self, client: "PortalClient") -> ptl.datasets.SinglepointDataset: + def _generate_collection( + self, client: "PortalClient" + ) -> ptl.datasets.SinglepointDataset: return client.add_dataset( dataset_type="singlepoint", @@ -817,7 +811,7 @@ def _get_specifications(self) -> Dict[str, QCSpecification]: """Needed for `submit` usage.""" ret = {} - for spec_name,spec in self.qc_specifications.items(): + for spec_name, spec in self.qc_specifications.items(): ret[spec_name] = QCSpecification( driver=self.driver, method=spec.method, @@ -829,7 +823,6 @@ def _get_specifications(self) -> Dict[str, QCSpecification]: return ret - def _get_entries(self) -> List[SinglepointDatasetNewEntry]: entries: List[SinglepointDatasetNewEntry] = [] @@ -844,13 +837,16 @@ def _get_entries(self) -> List[SinglepointDatasetNewEntry]: for j, molecule in enumerate(entry.initial_molecules): name = index + f"-{tag + j}" - entries.append(SinglepointDatasetNewEntry(name=name, molecule=molecule)) + entries.append( + SinglepointDatasetNewEntry(name=name, molecule=molecule) + ) else: entries.append( SinglepointDatasetNewEntry( name=entry_name, molecule=entry.initial_molecules[0], - )) + ) + ) return entries @@ -1008,7 +1004,7 @@ def _get_specifications(self) -> Dict[str, OptimizationSpecification]: ret = {} - for spec_name,spec in self.qc_specifications.items(): + for spec_name, spec in self.qc_specifications.items(): qc_spec = QCSpecification( driver=self.driver, method=spec.method, @@ -1021,7 +1017,7 @@ def _get_specifications(self) -> Dict[str, OptimizationSpecification]: ret[spec_name] = OptimizationSpecification( program=self.optimization_procedure.program, qc_specification=qc_spec, - keywords=opt_kw + keywords=opt_kw, ) return ret @@ -1040,17 +1036,21 @@ def _get_entries(self) -> List[OptimizationDatasetNewEntry]: for j, molecule in enumerate(entry.initial_molecules): name = index + f"-{tag + j}" - entries.append(OptimizationDatasetNewEntry(name=name, initial_molecule=molecule)) + entries.append( + OptimizationDatasetNewEntry( + name=name, initial_molecule=molecule + ) + ) else: entries.append( OptimizationDatasetNewEntry( name=entry_name, initial_molecule=entry.initial_molecules[0], - )) + ) + ) return entries - def to_tasks(self) -> Dict[str, List[OptimizationInput]]: """ Build a list of QCEngine optimisation inputs organised by the optimisation engine which should be used to run the task. @@ -1222,15 +1222,16 @@ def _get_entries(self) -> List[TorsiondriveDatasetNewEntry]: td_keywords.update(entry.keywords.dict(exclude_defaults=True)) - entries.append(TorsiondriveDatasetNewEntry( - name=entry_name, - initial_molecules=entry.initial_molecules, - torsiondrive_keywords=td_keywords - )) + entries.append( + TorsiondriveDatasetNewEntry( + name=entry_name, + initial_molecules=entry.initial_molecules, + torsiondrive_keywords=td_keywords, + ) + ) return entries - def to_tasks(self) -> Dict[str, List[OptimizationInput]]: """Build a list of QCEngine procedure tasks which correspond to this dataset.""" diff --git a/openff/qcsubmit/procedures.py b/openff/qcsubmit/procedures.py index 37d67931..49beccf5 100644 --- a/openff/qcsubmit/procedures.py +++ b/openff/qcsubmit/procedures.py @@ -2,7 +2,7 @@ The procedure settings controllers """ -from typing import Dict, Any +from typing import Any, Dict from pydantic import BaseModel, Field, validator from qcportal.records.optimization import OptimizationSpecification diff --git a/openff/qcsubmit/results/caching.py b/openff/qcsubmit/results/caching.py index 4d6871b2..c0cbdb67 100644 --- a/openff/qcsubmit/results/caching.py +++ b/openff/qcsubmit/results/caching.py @@ -10,8 +10,12 @@ from openff.units import unit from qcportal import PortalClient from qcportal.molecules import Molecule as QCMolecule -from qcportal.records import TorsiondriveRecord -from qcportal.records import OptimizationRecord, BaseRecord, SinglepointRecord +from qcportal.records import ( + BaseRecord, + OptimizationRecord, + SinglepointRecord, + TorsiondriveRecord, +) if TYPE_CHECKING: from openff.qcsubmit.results.results import ( @@ -155,7 +159,7 @@ def cached_query_procedures(client_address: str, record_ids: List[str]) -> List[ client_address = client_address.rstrip("/") client = cached_fractal_client(client_address) - query_limit = client.api_limits['get_records'] + query_limit = client.api_limits["get_records"] return _cached_client_query( client_address, @@ -182,7 +186,7 @@ def cached_query_molecules( client_address = client_address.rstrip("/") client = cached_fractal_client(client_address) - query_limit = client.api_limits['get_molecules'] + query_limit = client.api_limits["get_molecules"] return _cached_client_query( client_address, @@ -326,8 +330,10 @@ def cached_query_torsion_drive_results( qc_record = qc_records[result.record_id] - qc_grid_molecules = [(grid_point, opt.final_molecule) - for grid_point, opt in qc_record.minimum_optimizations.items()] + qc_grid_molecules = [ + (grid_point, opt.final_molecule) + for grid_point, opt in qc_record.minimum_optimizations.items() + ] grid_ids = [*qc_record.minimum_positions] # order the ids so the conformers follow the torsiondrive scan range grid_ids.sort(key=lambda s: tuple(float(x) for x in s.strip("[]").split(", "))) diff --git a/openff/qcsubmit/results/filters.py b/openff/qcsubmit/results/filters.py index e72aa3a2..81eb4e95 100644 --- a/openff/qcsubmit/results/filters.py +++ b/openff/qcsubmit/results/filters.py @@ -17,8 +17,8 @@ from pydantic import BaseModel, Field, PrivateAttr, root_validator, validator from qcelemental.molutil import guess_connectivity from qcportal.records import ( - OptimizationRecord, BaseRecord, + OptimizationRecord, RecordStatusEnum, SinglepointRecord, ) @@ -230,7 +230,12 @@ class LowestEnergyFilter(SinglepointRecordGroupFilter): def _filter_function( self, entries: List[ - Tuple["_BaseResult", Union[SinglepointRecord, OptimizationRecord], Molecule, str] + Tuple[ + "_BaseResult", + Union[SinglepointRecord, OptimizationRecord], + Molecule, + str, + ] ], ) -> List[Tuple["_BaseResult", str]]: """Only return the lowest energy entry or final molecule.""" @@ -362,7 +367,12 @@ def _compute_rmsd_matrix(self, molecule: Molecule) -> numpy.ndarray: def _filter_function( self, entries: List[ - Tuple["_BaseResult", Union[SinglepointRecord, OptimizationRecord], Molecule, str] + Tuple[ + "_BaseResult", + Union[SinglepointRecord, OptimizationRecord], + Molecule, + str, + ] ], ) -> List[Tuple["_BaseResult", str]]: @@ -436,7 +446,12 @@ class MinimumConformersFilter(SinglepointRecordGroupFilter): def _filter_function( self, entries: List[ - Tuple["_BaseResult", Union[SinglepointRecord, OptimizationRecord], Molecule, str] + Tuple[ + "_BaseResult", + Union[SinglepointRecord, OptimizationRecord], + Molecule, + str, + ] ], ) -> List[Tuple["_BaseResult", str]]: diff --git a/openff/qcsubmit/results/results.py b/openff/qcsubmit/results/results.py index 04f775bd..6deb7ece 100644 --- a/openff/qcsubmit/results/results.py +++ b/openff/qcsubmit/results/results.py @@ -21,17 +21,23 @@ from openmm import unit except ImportError: from simtk import unit + import numpy import qcportal from openff.toolkit.topology import Molecule from openff.toolkit.typing.engines.smirnoff import ForceField from pydantic import BaseModel, Field, validator -from qcportal.datasets import OptimizationDataset, TorsiondriveDataset from qcportal.datasets import BaseDataset as QCDataset +from qcportal.datasets import OptimizationDataset, TorsiondriveDataset from qcportal.datasets.singlepoint import SinglepointDataset, SinglepointDatasetNewEntry -from qcportal.records import OptimizationRecord, SinglepointRecord, TorsiondriveRecord, RecordStatusEnum +from qcportal.records import ( + BaseRecord, + OptimizationRecord, + RecordStatusEnum, + SinglepointRecord, + TorsiondriveRecord, +) from qcportal.records.singlepoint import SinglepointDriver -from qcportal.records import BaseRecord from typing_extensions import Literal from openff.qcsubmit.common_structures import Metadata, MoleculeAttributes, QCSpec @@ -286,7 +292,9 @@ def from_datasets( datasets = [datasets] if not all(isinstance(dataset, SinglepointDataset) for dataset in datasets): - raise TypeError("A ``BasicResultCollection`` can only be created from ``SinglepointDataset`` objects.") + raise TypeError( + "A ``BasicResultCollection`` can only be created from ``SinglepointDataset`` objects." + ) result_records = defaultdict(dict) @@ -295,27 +303,36 @@ def from_datasets( client = dataset.client # Fetch all entries for use later - dataset.fetch_entries(include=['molecule']) + dataset.fetch_entries(include=["molecule"]) if spec_name not in dataset.specifications: - raise KeyError(f"The {dataset.name} dataset does not contain a '{spec_name}' compute specification") + raise KeyError( + f"The {dataset.name} dataset does not contain a '{spec_name}' compute specification" + ) - for entry_name, spec_name, record in dataset.iterate_records(specification_names=spec_name, - status=RecordStatusEnum.complete): + for entry_name, spec_name, record in dataset.iterate_records( + specification_names=spec_name, status=RecordStatusEnum.complete + ): entry = dataset.get_entry(entry_name) molecule = entry.molecule - cmiles = molecule.extras["canonical_isomeric_explicit_hydrogen_mapped_smiles"] + cmiles = molecule.extras[ + "canonical_isomeric_explicit_hydrogen_mapped_smiles" + ] inchi_key = molecule.attributes.get("fixed_hydrogen_inchi_key") # Undefined stereochemistry is not expected however there # may be some TK specific edge cases we don't want # exceptions for such as OE and nitrogen stereochemistry. if inchi_key is None: - tmp_mol=Molecule.from_mapped_smiles(cmiles, allow_undefined_stereo=True) - inchi_key=tmp_mol.to_inchikey(fixed_hydrogens=True) + tmp_mol = Molecule.from_mapped_smiles( + cmiles, allow_undefined_stereo=True + ) + inchi_key = tmp_mol.to_inchikey(fixed_hydrogens=True) - br = BasicResult(record_id=record.id, cmiles=cmiles, inchi_key=inchi_key) + br = BasicResult( + record_id=record.id, cmiles=cmiles, inchi_key=inchi_key + ) result_records[client.address][record.id] = br return cls( @@ -358,7 +375,7 @@ def to_records(self) -> List[Tuple[SinglepointRecord, Molecule]]: client = cached_fractal_client(address=client_address) for record in records: - rec = client.get_singlepoints(record.record_id, include=['molecule']) + rec = client.get_singlepoints(record.record_id, include=["molecule"]) # OpenFF molecule molecule: Molecule = Molecule.from_mapped_smiles( @@ -417,26 +434,34 @@ def from_datasets( client = dataset.client # Fetch all entries for use later - dataset.fetch_entries(include=['initial_molecule']) + dataset.fetch_entries(include=["initial_molecule"]) if spec_name not in dataset.specifications: - raise KeyError(f"The {dataset.name} dataset does not contain a '{spec_name}' compute specification") - + raise KeyError( + f"The {dataset.name} dataset does not contain a '{spec_name}' compute specification" + ) - for entry_name, spec_name, record in dataset.iterate_records(specification_names=spec_name, - status=RecordStatusEnum.complete): + for entry_name, spec_name, record in dataset.iterate_records( + specification_names=spec_name, status=RecordStatusEnum.complete + ): entry = dataset.get_entry(entry_name) molecule = entry.initial_molecule - cmiles = entry.attributes["canonical_isomeric_explicit_hydrogen_mapped_smiles"] + cmiles = entry.attributes[ + "canonical_isomeric_explicit_hydrogen_mapped_smiles" + ] inchi_key = molecule.extras.get("fixed_hydrogen_inchi_key") if inchi_key is None: - tmp_mol = Molecule.from_mapped_smiles(cmiles, allow_undefined_stereo=True) - inchi_key=tmp_mol.to_inchikey(fixed_hydrogens=True) + tmp_mol = Molecule.from_mapped_smiles( + cmiles, allow_undefined_stereo=True + ) + inchi_key = tmp_mol.to_inchikey(fixed_hydrogens=True) - opt_rec = OptimizationResult(record_id=record.id, cmiles=cmiles, inchi_key=inchi_key) + opt_rec = OptimizationResult( + record_id=record.id, cmiles=cmiles, inchi_key=inchi_key + ) result_records[client.address][record.id] = opt_rec return cls( @@ -480,7 +505,9 @@ def to_records(self) -> List[Tuple[OptimizationRecord, Molecule]]: client = cached_fractal_client(address=client_address) for record in records: - rec = client.get_optimizations(record.record_id, include=['initial_molecule']) + rec = client.get_optimizations( + record.record_id, include=["initial_molecule"] + ) # OpenFF molecule molecule: Molecule = Molecule.from_mapped_smiles( @@ -488,14 +515,14 @@ def to_records(self) -> List[Tuple[OptimizationRecord, Molecule]]: ) molecule.add_conformer( - numpy.array(rec.initial_molecule.geometry, float).reshape(-1, 3) * unit.bohr + numpy.array(rec.initial_molecule.geometry, float).reshape(-1, 3) + * unit.bohr ) records_and_molecules.append((rec, molecule)) return records_and_molecules - # NOTE: no longer using `driver` here def to_basic_result_collection(self) -> BasicResultCollection: """Returns a basic results collection which references results records which @@ -517,7 +544,9 @@ def to_basic_result_collection(self) -> BasicResultCollection: # will be inefficient at the moment for record, molecule in records_and_molecules: - result_records[record.client.address].append((record.trajectory[-1], molecule)) + result_records[record.client.address].append( + (record.trajectory[-1], molecule) + ) result_entries = defaultdict(list) @@ -525,14 +554,14 @@ def to_basic_result_collection(self) -> BasicResultCollection: for record, molecule in result_records[client_address]: result_entries[client_address].append( - BasicResult( - record_id=record.id, - cmiles=molecule.to_smiles( - isomeric=True, explicit_hydrogens=True, mapped=True - ), - inchi_key=molecule.to_inchikey(fixed_hydrogens=True), - ) + BasicResult( + record_id=record.id, + cmiles=molecule.to_smiles( + isomeric=True, explicit_hydrogens=True, mapped=True + ), + inchi_key=molecule.to_inchikey(fixed_hydrogens=True), ) + ) return BasicResultCollection(entries=result_entries) @@ -579,7 +608,10 @@ def create_basic_dataset( metadata={} if metadata is None else metadata, qc_specifications={"default": QCSpec()} if qc_specifications is None - else {qc_specification.spec_name: qc_specification for qc_specification in qc_specifications}, + else { + qc_specification.spec_name: qc_specification + for qc_specification in qc_specifications + }, ) for records in records_by_cmiles.values(): @@ -645,21 +677,30 @@ def from_datasets( dataset.fetch_entries() if spec_name not in dataset.specifications: - raise KeyError(f"The {dataset.name} dataset does not contain a '{spec_name}' compute specification") + raise KeyError( + f"The {dataset.name} dataset does not contain a '{spec_name}' compute specification" + ) - for entry_name, spec_name, record in dataset.iterate_records(specification_names=spec_name, - status=RecordStatusEnum.complete): + for entry_name, spec_name, record in dataset.iterate_records( + specification_names=spec_name, status=RecordStatusEnum.complete + ): entry = dataset.get_entry(entry_name) - cmiles = entry.attributes["canonical_isomeric_explicit_hydrogen_mapped_smiles"] + cmiles = entry.attributes[ + "canonical_isomeric_explicit_hydrogen_mapped_smiles" + ] inchi_key = entry.attributes.get("fixed_hydrogen_inchi_key") if inchi_key is None: - tmp_mol = Molecule.from_mapped_smiles(cmiles, allow_undefined_stereo=True) + tmp_mol = Molecule.from_mapped_smiles( + cmiles, allow_undefined_stereo=True + ) inchi_key = tmp_mol.to_inchikey(fixed_hydrogens=True) - td_rec = TorsionDriveResult(record_id=record.id, cmiles=cmiles, inchi_key=inchi_key) + td_rec = TorsionDriveResult( + record_id=record.id, cmiles=cmiles, inchi_key=inchi_key + ) result_records[client.address][record.id] = td_rec return cls( @@ -712,7 +753,9 @@ def to_records(self) -> List[Tuple[TorsiondriveRecord, Molecule]]: ) # Map of torsion drive keys to minimum optimization - qc_grid_molecules = [(k, v.final_molecule) for k,v in rec.minimum_optimizations.items()] + qc_grid_molecules = [ + (k, v.final_molecule) for k, v in rec.minimum_optimizations.items() + ] # order the ids so the conformers follow the torsiondrive scan range # x[0] is the torsiondrive key, ie "[90]" @@ -730,7 +773,6 @@ def to_records(self) -> List[Tuple[TorsiondriveRecord, Molecule]]: return records_and_molecules - def create_optimization_dataset( self, dataset_name: str, diff --git a/openff/qcsubmit/tests/results/__init__.py b/openff/qcsubmit/tests/results/__init__.py index dd59d643..71a54d3a 100644 --- a/openff/qcsubmit/tests/results/__init__.py +++ b/openff/qcsubmit/tests/results/__init__.py @@ -7,16 +7,10 @@ from pydantic import BaseModel from qcelemental.models import DriverEnum from qcelemental.models.procedures import TDKeywords - -from qcportal.records import ( - SinglepointRecord, - OptimizationRecord, - TorsiondriveRecord, -) - +from qcportal.records import OptimizationRecord, SinglepointRecord, TorsiondriveRecord +from qcportal.records.models import RecordStatusEnum from qcportal.records.optimization.models import OptimizationSpecification from qcportal.records.singlepoint.models import QCSpecification -from qcportal.records.models import RecordStatusEnum from openff.qcsubmit.results import ( BasicResult, diff --git a/openff/qcsubmit/tests/results/test_caching.py b/openff/qcsubmit/tests/results/test_caching.py index 58262745..3f42bfbe 100644 --- a/openff/qcsubmit/tests/results/test_caching.py +++ b/openff/qcsubmit/tests/results/test_caching.py @@ -4,8 +4,8 @@ import pytest import requests_mock from openff.toolkit.topology import Molecule -from qcportal.records import OptimizationRecord, SinglepointRecord from openff.units import unit +from qcportal.records import OptimizationRecord, SinglepointRecord from openff.qcsubmit.results import BasicResult, OptimizationResult, TorsionDriveResult from openff.qcsubmit.results.caching import ( diff --git a/openff/qcsubmit/tests/results/test_filters.py b/openff/qcsubmit/tests/results/test_filters.py index b9479ac7..b0381653 100644 --- a/openff/qcsubmit/tests/results/test_filters.py +++ b/openff/qcsubmit/tests/results/test_filters.py @@ -6,7 +6,7 @@ from openff.units import unit from pydantic import ValidationError from qcelemental.models import DriverEnum -from qcportal.records import SinglepointRecord, RecordStatusEnum +from qcportal.records import RecordStatusEnum, SinglepointRecord from openff.qcsubmit.results import ( BasicResult, diff --git a/openff/qcsubmit/tests/results/test_results.py b/openff/qcsubmit/tests/results/test_results.py index 6aae364e..2d1c9b24 100644 --- a/openff/qcsubmit/tests/results/test_results.py +++ b/openff/qcsubmit/tests/results/test_results.py @@ -10,22 +10,21 @@ from openff.toolkit.typing.engines.smirnoff import ForceField from openff.units import unit from pydantic import ValidationError +from qcelemental.models import DriverEnum from qcportal import PortalClient from qcportal.molecules import Molecule as QCMolecule from qcportal.records import ( - SinglepointRecord, OptimizationRecord, - TorsiondriveRecord, RecordStatusEnum, + SinglepointRecord, + TorsiondriveRecord, ) -from qcelemental.models import DriverEnum - +from qcportal.records.optimization import OptimizationSpecification +from qcportal.records.singlepoint import QCSpecification from qcportal.records.torsiondrive import ( - TorsiondriveSpecification, TorsiondriveKeywords, + TorsiondriveSpecification, ) -from qcportal.records.optimization import OptimizationSpecification -from qcportal.records.singlepoint import QCSpecification from openff.qcsubmit.common_structures import QCSpec from openff.qcsubmit.exceptions import RecordTypeError