diff --git a/notebooks/Tutorial.ipynb b/notebooks/Tutorial.ipynb index 30ad283..1a1ad3f 100644 --- a/notebooks/Tutorial.ipynb +++ b/notebooks/Tutorial.ipynb @@ -424,6 +424,8 @@ "\n", "The toolkit provides functions for performing multiple useful sanity checks.\n", "\n", + "The validators validate a sequence of `Identified` (a thing that has a `TermId` identifier) or `TermId`s.\n", + "\n", "## Obsolete term IDs\n", "\n", "We should always use the primary term IDs instead of the obsolete terms.\n", @@ -448,7 +450,7 @@ "\n", "# The term uses an obsolete term ID `HP:0006010` instead of the current `HP:0100807`.\n", "inputs = [\n", - " MinimalTerm.create_minimal_term(TermId.from_curie('HP:0006010'), name='Long fingers', alt_term_ids=[], is_obsolete=False)\n", + " TermId.from_curie('HP:0006010')\n", "]\n", "results = obso_validator.validate(inputs)\n", "\n", diff --git a/src/hpotk/validate/_hpo.py b/src/hpotk/validate/_hpo.py index f15b55f..fa9379b 100644 --- a/src/hpotk/validate/_hpo.py +++ b/src/hpotk/validate/_hpo.py @@ -12,6 +12,9 @@ class BaseOntologyRuleValidator(RuleValidator, metaclass=abc.ABCMeta): def __init__(self, ontology: MinimalOntology): + if not isinstance(ontology, MinimalOntology): + raise ValueError(f'ontology must be an instance of hpotk.ontology.MinimalOntology ' + f'but it was an instance of {type(ontology)}') self._ontology = ontology def _primary_term_id(self, identifier: TermId) -> typing.Optional[TermId]: @@ -32,8 +35,8 @@ class AnnotationPropagationValidator(BaseOntologyRuleValidator): def __init__(self, ontology: MinimalOntology): super().__init__(ontology) - def validate(self, items: typing.Sequence[Identified]) -> ValidationResults: - term_ids = {self._primary_term_id(term.identifier) for term in items} + def validate(self, items: typing.Sequence[typing.Union[Identified, TermId]]) -> ValidationResults: + term_ids = {self._primary_term_id(self.extract_term_id(item)) for item in items} results = [] for term_id in term_ids: for ancestor in get_ancestors(self._ontology, source=term_id, include_source=False): @@ -62,18 +65,19 @@ class PhenotypicAbnormalityValidator(BaseOntologyRuleValidator): def __init__(self, ontology: MinimalOntology): super().__init__(ontology) - def validate(self, items: typing.Sequence[Identified]) -> ValidationResults: + def validate(self, items: typing.Sequence[typing.Union[Identified, TermId]]) -> ValidationResults: results = [] - for term in items: - term_id = self._primary_term_id(term.identifier) + for item in items: + term_id = self.extract_term_id(item) + term_id = self._primary_term_id(term_id) ancestors = get_ancestors(self._ontology, source=term_id, include_source=False) if PHENOTYPIC_ABNORMALITY not in ancestors: - term = self._ontology.get_term(term_id) + item = self._ontology.get_term(term_id) results.append( ValidationResult( level=ValidationLevel.ERROR, category='phenotypic_abnormality_descendant', - message=f'{term.name} [{term.identifier.value}] ' + message=f'{item.name} [{item.identifier.value}] ' f'is not a descendant of Phenotypic abnormality [{PHENOTYPIC_ABNORMALITY.value}]' ) ) @@ -86,17 +90,18 @@ class ObsoleteTermIdsValidator(BaseOntologyRuleValidator): def __init__(self, ontology: MinimalOntology): super().__init__(ontology) - def validate(self, items: typing.Sequence[Identified]) -> ValidationResults: + def validate(self, items: typing.Sequence[typing.Union[Identified, TermId]]) -> ValidationResults: results = [] - for term in items: - current_id = self._primary_term_id(term.identifier) - if current_id != term.identifier: - current_term = self._ontology.get_term(term.identifier) + for item in items: + term_id = self.extract_term_id(item) + current_id = self._primary_term_id(term_id) + if current_id != term_id: + current_term = self._ontology.get_term(current_id) results.append( ValidationResult( level=ValidationLevel.WARNING, category='obsolete_term_id_is_used', - message=f'Using the obsolete {term.identifier.value} instead of {current_id.value} ' + message=f'Using the obsolete {term_id} instead of {current_id.value} ' f'for {current_term.name}' ) ) diff --git a/src/hpotk/validate/_model.py b/src/hpotk/validate/_model.py index 7643384..504a01d 100644 --- a/src/hpotk/validate/_model.py +++ b/src/hpotk/validate/_model.py @@ -4,7 +4,7 @@ from collections import namedtuple -from hpotk.model import Identified +from hpotk.model import Identified, TermId class ValidationLevel(enum.Enum): @@ -39,14 +39,28 @@ def __repr__(self) -> str: class RuleValidator(metaclass=abc.ABCMeta): """ - `RuleValidator` checks if a sequence of `Identified` items meet the validation requirements. - The issues are returned as `ValidationResults`. + `RuleValidator` checks if a sequence of :class:`Identified` or :class:`TermId` instances meet + the validation requirements. + + The validators can check each item individually or as a collection, for instance, + to discover violation of the annotation propagation rule, etc. + + The issues are returned as :class:`ValidationResults`. """ @abc.abstractmethod - def validate(self, items: typing.Sequence[Identified]) -> ValidationResults: + def validate(self, items: typing.Sequence[typing.Union[Identified, TermId]]) -> ValidationResults: pass + @staticmethod + def extract_term_id(item: typing.Union[Identified, TermId]) -> TermId: + if isinstance(item, Identified): + return item.identifier + elif isinstance(item, TermId): + return item + else: + raise ValueError(f'Item {item} of type {type(item)} is not a TermId nor extends Identified') + class ValidationRunner: """ @@ -56,7 +70,7 @@ class ValidationRunner: def __init__(self, validators: typing.Sequence[RuleValidator]): self._validators = validators - def validate_all(self, items: typing.Sequence[Identified]) -> ValidationResults: + def validate_all(self, items: typing.Sequence[typing.Union[Identified, TermId]]) -> ValidationResults: overall = [] for validator in self._validators: results = validator.validate(items) diff --git a/tests/algorithm/test_similarity.py b/tests/algorithm/test_similarity.py index 197535a..c4a7dc8 100644 --- a/tests/algorithm/test_similarity.py +++ b/tests/algorithm/test_similarity.py @@ -12,7 +12,9 @@ from hpotk.ontology.load.obographs import load_minimal_ontology TOY_HPO = resource_filename(__name__, os.path.join('../data', 'hp.small.json')) +# TOY_HPO = '/home/ielis/data/ontologies/hpo/2023-04-05/hp.2023-04-05.json' TOY_HPOA = resource_filename(__name__, os.path.join('../data', 'phenotype.real-shortlist.hpoa')) +# TOY_HPOA = '/home/ielis/data/hpoa/phenotype.2023-04-05.hpoa' class TestResnik(unittest.TestCase): @@ -25,6 +27,11 @@ def setUpClass(cls) -> None: hpoa_loader = SimpleHpoaDiseaseLoader(cls.HPO) cls.DISEASES: HpoDiseases = hpoa_loader.load(TOY_HPOA) + @unittest.skip + def test_precalculate_and_store(self): + mica = calculate_ic_for_annotated_items(self.DISEASES, self.HPO) + mica.to_csv('ic.csv.gz') + def test_calculate_ic_for_hpo_diseases(self): mica = calculate_ic_for_annotated_items(self.DISEASES, self.HPO)