Skip to content

Commit

Permalink
Merge branch 'main' into issue-433
Browse files Browse the repository at this point in the history
  • Loading branch information
korikuzma committed Jul 17, 2024
2 parents ec9fe4a + 7479a10 commit d4934cf
Show file tree
Hide file tree
Showing 7 changed files with 39 additions and 110 deletions.
7 changes: 2 additions & 5 deletions src/ga4gh/core/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,12 +8,12 @@
from .enderef import ga4gh_enref, ga4gh_deref
from .identifiers import (
ga4gh_digest, ga4gh_identify, ga4gh_serialize, is_ga4gh_identifier,
parse_ga4gh_identifier, VrsObjectIdentifierIs, use_ga4gh_compute_identifier_when,
VrsObjectIdentifierIs, use_ga4gh_compute_identifier_when,
CURIE_NAMESPACE, CURIE_SEP, GA4GH_PREFIX_SEP, GA4GH_IR_REGEXP, GA4GH_DIGEST_REGEXP,
PrevVrsVersion
)
from .pydantic import (
is_pydantic_instance, is_curie_type, is_ga4gh_identifiable, is_literal, pydantic_copy
is_pydantic_instance, is_curie_type, pydantic_copy
)
from . import entity_models, domain_models

Expand All @@ -25,7 +25,6 @@
"ga4gh_identify",
"ga4gh_serialize",
"is_ga4gh_identifier",
"parse_ga4gh_identifier",
"VrsObjectIdentifierIs",
"use_ga4gh_compute_identifier_when",
"CURIE_NAMESPACE",
Expand All @@ -36,8 +35,6 @@
"PrevVrsVersion",
"is_pydantic_instance",
"is_curie_type",
"is_ga4gh_identifiable",
"is_literal",
"pydantic_copy",
"entity_models",
"domain_models"
Expand Down
18 changes: 9 additions & 9 deletions src/ga4gh/core/domain_models.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@

from pydantic import Field, RootModel

from ga4gh.core.entity_models import _DomainEntity
from ga4gh.core.entity_models import DomainEntity


class CommonDomainType(str, Enum):
Expand All @@ -30,7 +30,7 @@ class CommonDomainType(str, Enum):
TR_COMB = "CombinationTherapy"
GENE = "Gene"

class Phenotype(_DomainEntity):
class Phenotype(DomainEntity):
"""An observable characteristic or trait of an organism."""

type: Literal[CommonDomainType.PHENOTYPE] = Field(
Expand All @@ -39,7 +39,7 @@ class Phenotype(_DomainEntity):
)


class Disease(_DomainEntity):
class Disease(DomainEntity):
"""A particular abnormal condition that negatively affects the structure or function
of all or part of an organism and is not immediately due to any external injury.
"""
Expand All @@ -50,7 +50,7 @@ class Disease(_DomainEntity):
)


class TraitSet(_DomainEntity):
class TraitSet(DomainEntity):
"""A set of phenotype and/or disease concepts that together constitute a condition."""

type: Literal[CommonDomainType.TRAIT_SET] = Field(
Expand All @@ -73,7 +73,7 @@ class Condition(RootModel):
)


class TherapeuticAction(_DomainEntity):
class TherapeuticAction(DomainEntity):
"""A therapeutic action taken that is intended to alter or stop a pathologic process."""

type: Literal[CommonDomainType.TR_ACTION] = Field(
Expand All @@ -82,7 +82,7 @@ class TherapeuticAction(_DomainEntity):
)


class TherapeuticAgent(_DomainEntity):
class TherapeuticAgent(DomainEntity):
"""An administered therapeutic agent that is intended to alter or stop a pathologic process."""

type: Literal[CommonDomainType.TR_AGENT] = Field(
Expand All @@ -91,7 +91,7 @@ class TherapeuticAgent(_DomainEntity):
)


class TherapeuticSubstituteGroup(_DomainEntity):
class TherapeuticSubstituteGroup(DomainEntity):
"""A group of therapeutic procedures that may be treated as substitutes for one another."""

type: Literal[CommonDomainType.TR_SUB] = Field(
Expand All @@ -105,7 +105,7 @@ class TherapeuticSubstituteGroup(_DomainEntity):
)


class CombinationTherapy(_DomainEntity):
class CombinationTherapy(DomainEntity):
"""A therapeutic procedure that involves multiple different therapeutic procedures
performed in combination.
"""
Expand Down Expand Up @@ -133,7 +133,7 @@ class TherapeuticProcedure(RootModel):
)


class Gene(_DomainEntity):
class Gene(DomainEntity):
"""A basic physical and functional unit of heredity."""

type: Literal[CommonDomainType.GENE] = Field(
Expand Down
10 changes: 4 additions & 6 deletions src/ga4gh/core/enderef.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,9 +13,7 @@
from .identifiers import ga4gh_identify, is_ga4gh_identifier
from .pydantic import (
is_pydantic_instance,
is_list,
is_curie_type,
is_ga4gh_identifiable,
get_pydantic_root,
pydantic_copy)

Expand Down Expand Up @@ -45,7 +43,7 @@ def _enref(o):
ref_att_names = cra_map.get(o.type, [])
for ran in ref_att_names:
v = getattr(o, ran)
if is_list(v):
if isinstance(v, list):
setattr(o, ran, [_enref(o2) for o2 in v])
elif isinstance(v, str):
pass
Expand All @@ -60,7 +58,7 @@ def _enref(o):

if not is_pydantic_instance(o):
raise ValueError("Called ga4gh_enref() with non-pydantic instance")
if not is_ga4gh_identifiable(o):
if not o.is_ga4gh_identifiable():
raise ValueError("Called ga4gh_enref() with non-identifiable object")

# in-place replacement on object copy
Expand Down Expand Up @@ -88,7 +86,7 @@ def _deref(o):
ref_att_names = cra_map[o.type]
for ran in ref_att_names:
v = getattr(o, ran)
if is_list(v):
if isinstance(v, list):
setattr(o, ran, [_deref(object_store[str(curie)]) for curie in v])
elif is_ga4gh_identifier(v):
v = get_pydantic_root(v)
Expand All @@ -101,7 +99,7 @@ def _deref(o):

if not is_pydantic_instance(o):
raise ValueError("Called ga4gh_deref() with non-pydantic instance")
if not is_ga4gh_identifiable(o):
if not o.is_ga4gh_identifiable():
raise ValueError("Called ga4gh_deref() with non-identifiable object")

# in-place replacement on object copy
Expand Down
5 changes: 3 additions & 2 deletions src/ga4gh/core/entity_models.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
* `import ga4gh.core`, and refer to models using the fully-qualified
module name, e.g., `ga4gh.core.entity_models.Coding`
"""
from abc import ABC
from typing import Any, Dict, Annotated, Optional, Union, List
from enum import Enum

Expand Down Expand Up @@ -153,7 +154,7 @@ class Expression(BaseModel):
#########################################


class _Entity(BaseModel):
class Entity(ABC, BaseModel):
"""Entity is the root class of the 'gks-common' core information model classes -
those that have identifiers and other general metadata like labels, xrefs, urls,
descriptions, etc. All common classes descend from and inherit its attributes.
Expand Down Expand Up @@ -187,7 +188,7 @@ def validate_type(cls, v: str | Enum) -> str:
return v


class _DomainEntity(_Entity):
class DomainEntity(Entity, ABC):
"""An Entity that is specific to a particular biomedical domain such as disease,
therapeutics, or genes. Domain Entities are considered as 'concept-level' entities,
as opposed to particular instances. e.g. 'Lung Cancer', not 'patient123's lung
Expand Down
73 changes: 14 additions & 59 deletions src/ga4gh/core/identifiers.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,18 +16,15 @@
"""

import contextvars
import logging
import re
from contextlib import ContextDecorator
from enum import Enum, IntEnum
from typing import Union, Optional
from pydantic import BaseModel, RootModel
from typing import Optional
from pydantic import BaseModel

from .pydantic import get_pydantic_root
from ga4gh.core.pydantic import get_pydantic_root

__all__ = "ga4gh_digest ga4gh_identify ga4gh_serialize is_ga4gh_identifier parse_ga4gh_identifier".split()

_logger = logging.getLogger(__name__)
__all__ = "ga4gh_digest ga4gh_identify ga4gh_serialize is_ga4gh_identifier".split()

CURIE_NAMESPACE = "ga4gh"
CURIE_SEP = ":"
Expand All @@ -36,7 +33,7 @@
GA4GH_IR_REGEXP = re.compile(r"^ga4gh:(?P<type>[^.]+)\.(?P<digest>[0-9A-Za-z_\-]{32})$")
GA4GH_DIGEST_REGEXP = re.compile(r"^[0-9A-Za-z_\-]{32}$")

ns_w_sep = CURIE_NAMESPACE + CURIE_SEP
NS_W_SEP = f"{CURIE_NAMESPACE}{CURIE_SEP}"


class VrsObjectIdentifierIs(IntEnum):
Expand All @@ -47,9 +44,9 @@ class VrsObjectIdentifierIs(IntEnum):
GA4GH_INVALID - Compute the identifier if it is missing or is present but syntactically invalid
MISSING - Only compute the identifier if missing
The default behavior is safe and ensures that the identifiers are correct,
but at a performance cost. Where the source of inputs to `ga4gh_identify`
are well controlled, for example when annotating a VCF file with VRS IDs,
The default behavior is safe and ensures that the identifiers are correct,
but at a performance cost. Where the source of inputs to `ga4gh_identify`
are well controlled, for example when annotating a VCF file with VRS IDs,
using `MISSING` can improve performance.
"""

Expand Down Expand Up @@ -113,27 +110,7 @@ def is_ga4gh_identifier(ir):
False
"""
return str(get_pydantic_root(ir)).startswith(ns_w_sep)


def parse_ga4gh_identifier(ir):
"""
Parses a GA4GH identifier, returning a dict with type and digest components
>>> parse_ga4gh_identifier("ga4gh:SQ.0123abcd")
{'type': 'SQ', 'digest': '0123abcd'}
>>> parse_ga4gh_identifier("notga4gh:SQ.0123abcd")
Traceback (most recent call last):
...
ValueError: notga4gh:SQ.0123abcd
"""

try:
return GA4GH_IR_REGEXP.match(str(ir)).groupdict()
except AttributeError as e:
raise ValueError(ir) from e
return str(get_pydantic_root(ir)).startswith(NS_W_SEP)


def ga4gh_identify(vro, in_place: str = 'default', as_version: PrevVrsVersion | None = None) -> str | None:
Expand Down Expand Up @@ -171,7 +148,7 @@ def ga4gh_identify(vro, in_place: str = 'default', as_version: PrevVrsVersion |
obj_id = getattr(vro, "id", None)
if when_rule == VrsObjectIdentifierIs.MISSING:
do_compute = obj_id is None or obj_id == ""
else: # GA4GHComputeIdentifierIs.GA4GH_INVALID
else: # VrsObjectIdentifierIs.GA4GH_INVALID
do_compute = not vro.has_valid_ga4gh_id()

if do_compute:
Expand All @@ -182,9 +159,11 @@ def ga4gh_identify(vro, in_place: str = 'default', as_version: PrevVrsVersion |
return None


def ga4gh_digest(vro: BaseModel, overwrite: bool = False, as_version: PrevVrsVersion | None = None) -> str:
def ga4gh_digest(vro: BaseModel, overwrite: bool = False, as_version: PrevVrsVersion | None = None) -> str | None:
"""Return the GA4GH digest for the object.
Only GA4GH identifiable objects are GA4GH digestible.
If ``as_version`` is provided, other parameters are ignored and a digest is returned
following the conventions of the VRS version indicated by ``as_version_``.
Raises ``ValueError`` if ``as_version`` is not a ``PrevVrsVersion``.
Expand All @@ -197,7 +176,7 @@ def ga4gh_digest(vro: BaseModel, overwrite: bool = False, as_version: PrevVrsVer
"""
PrevVrsVersion.validate(as_version)

if vro.is_ga4gh_identifiable(): # Only GA4GH identifiable objects are GA4GH digestible
if vro.is_ga4gh_identifiable():
if as_version is None:
return vro.get_or_create_digest(overwrite)
else:
Expand All @@ -206,30 +185,6 @@ def ga4gh_digest(vro: BaseModel, overwrite: bool = False, as_version: PrevVrsVer
return None


def replace_with_digest(val: dict) -> Union[str, dict]:
"""
If val has a digest computed, return it, else return val
"""
if isinstance(val, dict) and val.get("digest", None) is not None:
return val["digest"]
return val


def collapse_identifiable_values(obj: dict) -> dict:
"""
Replaces dict values with their digests if they are defined.
Does not collapse the top level object, only objects it contains.
"""
if isinstance(obj, dict):
obj = {
k: replace_with_digest(collapse_identifiable_values(obj[k]))
for k in obj.keys()
}
elif isinstance(obj, list) or isinstance(obj, set):
obj = [replace_with_digest(collapse_identifiable_values(elem)) for elem in obj]
return obj


def ga4gh_serialize(obj: BaseModel, as_version: PrevVrsVersion | None = None) -> Optional[bytes]:
"""Serializes an object for use in computed digest computation.
Expand Down
18 changes: 0 additions & 18 deletions src/ga4gh/core/pydantic.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,24 +21,6 @@ def getattr_in(obj, names) -> Any:
return v


def is_ga4gh_identifiable(o: Any) -> bool:
"""
Determine if object is a GA4GH identifiable type.
:param o: Object
:return: `True` if `o` is a GA4GH Identifiable Object. `False` otherwise.
"""
return o.is_ga4gh_identifiable()


def is_literal(o: Any) -> bool:
return isinstance(o, (str, int, float, complex, bool))


def is_list(o: Any) -> bool:
return isinstance(o, list)


def is_curie_type(o: Any) -> bool:
"""
Returns true if the object is a str-like matching the CURIE pattern.
Expand Down
Loading

0 comments on commit d4934cf

Please sign in to comment.