Skip to content

Commit

Permalink
refactor: remove unused code + small cleanup (#432)
Browse files Browse the repository at this point in the history
* Only focused on ga4gh.core + Pydantic models
  • Loading branch information
korikuzma committed Jul 17, 2024
1 parent 29c38ba commit 7479a10
Show file tree
Hide file tree
Showing 5 changed files with 21 additions and 94 deletions.
7 changes: 2 additions & 5 deletions src/ga4gh/core/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,12 +8,12 @@
from .enderef import ga4gh_enref, ga4gh_deref
from .identifiers import (
ga4gh_digest, ga4gh_identify, ga4gh_serialize, is_ga4gh_identifier,
parse_ga4gh_identifier, VrsObjectIdentifierIs, use_ga4gh_compute_identifier_when,
VrsObjectIdentifierIs, use_ga4gh_compute_identifier_when,
CURIE_NAMESPACE, CURIE_SEP, GA4GH_PREFIX_SEP, GA4GH_IR_REGEXP, GA4GH_DIGEST_REGEXP,
PrevVrsVersion
)
from .pydantic import (
is_pydantic_instance, is_curie_type, is_ga4gh_identifiable, is_literal, pydantic_copy
is_pydantic_instance, is_curie_type, pydantic_copy
)
from . import entity_models, domain_models

Expand All @@ -25,7 +25,6 @@
"ga4gh_identify",
"ga4gh_serialize",
"is_ga4gh_identifier",
"parse_ga4gh_identifier",
"VrsObjectIdentifierIs",
"use_ga4gh_compute_identifier_when",
"CURIE_NAMESPACE",
Expand All @@ -36,8 +35,6 @@
"PrevVrsVersion",
"is_pydantic_instance",
"is_curie_type",
"is_ga4gh_identifiable",
"is_literal",
"pydantic_copy",
"entity_models",
"domain_models"
Expand Down
10 changes: 4 additions & 6 deletions src/ga4gh/core/enderef.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,9 +13,7 @@
from .identifiers import ga4gh_identify, is_ga4gh_identifier
from .pydantic import (
is_pydantic_instance,
is_list,
is_curie_type,
is_ga4gh_identifiable,
get_pydantic_root,
pydantic_copy)

Expand Down Expand Up @@ -45,7 +43,7 @@ def _enref(o):
ref_att_names = cra_map.get(o.type, [])
for ran in ref_att_names:
v = getattr(o, ran)
if is_list(v):
if isinstance(v, list):
setattr(o, ran, [_enref(o2) for o2 in v])
elif isinstance(v, str):
pass
Expand All @@ -60,7 +58,7 @@ def _enref(o):

if not is_pydantic_instance(o):
raise ValueError("Called ga4gh_enref() with non-pydantic instance")
if not is_ga4gh_identifiable(o):
if not o.is_ga4gh_identifiable():
raise ValueError("Called ga4gh_enref() with non-identifiable object")

# in-place replacement on object copy
Expand Down Expand Up @@ -88,7 +86,7 @@ def _deref(o):
ref_att_names = cra_map[o.type]
for ran in ref_att_names:
v = getattr(o, ran)
if is_list(v):
if isinstance(v, list):
setattr(o, ran, [_deref(object_store[str(curie)]) for curie in v])
elif is_ga4gh_identifier(v):
v = get_pydantic_root(v)
Expand All @@ -101,7 +99,7 @@ def _deref(o):

if not is_pydantic_instance(o):
raise ValueError("Called ga4gh_deref() with non-pydantic instance")
if not is_ga4gh_identifiable(o):
if not o.is_ga4gh_identifiable():
raise ValueError("Called ga4gh_deref() with non-identifiable object")

# in-place replacement on object copy
Expand Down
73 changes: 14 additions & 59 deletions src/ga4gh/core/identifiers.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,18 +16,15 @@
"""

import contextvars
import logging
import re
from contextlib import ContextDecorator
from enum import Enum, IntEnum
from typing import Union, Optional
from pydantic import BaseModel, RootModel
from typing import Optional
from pydantic import BaseModel

from .pydantic import get_pydantic_root
from ga4gh.core.pydantic import get_pydantic_root

__all__ = "ga4gh_digest ga4gh_identify ga4gh_serialize is_ga4gh_identifier parse_ga4gh_identifier".split()

_logger = logging.getLogger(__name__)
__all__ = "ga4gh_digest ga4gh_identify ga4gh_serialize is_ga4gh_identifier".split()

CURIE_NAMESPACE = "ga4gh"
CURIE_SEP = ":"
Expand All @@ -36,7 +33,7 @@
GA4GH_IR_REGEXP = re.compile(r"^ga4gh:(?P<type>[^.]+)\.(?P<digest>[0-9A-Za-z_\-]{32})$")
GA4GH_DIGEST_REGEXP = re.compile(r"^[0-9A-Za-z_\-]{32}$")

ns_w_sep = CURIE_NAMESPACE + CURIE_SEP
NS_W_SEP = f"{CURIE_NAMESPACE}{CURIE_SEP}"


class VrsObjectIdentifierIs(IntEnum):
Expand All @@ -47,9 +44,9 @@ class VrsObjectIdentifierIs(IntEnum):
GA4GH_INVALID - Compute the identifier if it is missing or is present but syntactically invalid
MISSING - Only compute the identifier if missing
The default behavior is safe and ensures that the identifiers are correct,
but at a performance cost. Where the source of inputs to `ga4gh_identify`
are well controlled, for example when annotating a VCF file with VRS IDs,
The default behavior is safe and ensures that the identifiers are correct,
but at a performance cost. Where the source of inputs to `ga4gh_identify`
are well controlled, for example when annotating a VCF file with VRS IDs,
using `MISSING` can improve performance.
"""

Expand Down Expand Up @@ -113,27 +110,7 @@ def is_ga4gh_identifier(ir):
False
"""
return str(get_pydantic_root(ir)).startswith(ns_w_sep)


def parse_ga4gh_identifier(ir):
"""
Parses a GA4GH identifier, returning a dict with type and digest components
>>> parse_ga4gh_identifier("ga4gh:SQ.0123abcd")
{'type': 'SQ', 'digest': '0123abcd'}
>>> parse_ga4gh_identifier("notga4gh:SQ.0123abcd")
Traceback (most recent call last):
...
ValueError: notga4gh:SQ.0123abcd
"""

try:
return GA4GH_IR_REGEXP.match(str(ir)).groupdict()
except AttributeError as e:
raise ValueError(ir) from e
return str(get_pydantic_root(ir)).startswith(NS_W_SEP)


def ga4gh_identify(vro, in_place: str = 'default', as_version: PrevVrsVersion | None = None) -> str | None:
Expand Down Expand Up @@ -171,7 +148,7 @@ def ga4gh_identify(vro, in_place: str = 'default', as_version: PrevVrsVersion |
obj_id = getattr(vro, "id", None)
if when_rule == VrsObjectIdentifierIs.MISSING:
do_compute = obj_id is None or obj_id == ""
else: # GA4GHComputeIdentifierIs.GA4GH_INVALID
else: # VrsObjectIdentifierIs.GA4GH_INVALID
do_compute = not vro.has_valid_ga4gh_id()

if do_compute:
Expand All @@ -182,9 +159,11 @@ def ga4gh_identify(vro, in_place: str = 'default', as_version: PrevVrsVersion |
return None


def ga4gh_digest(vro: BaseModel, overwrite: bool = False, as_version: PrevVrsVersion | None = None) -> str:
def ga4gh_digest(vro: BaseModel, overwrite: bool = False, as_version: PrevVrsVersion | None = None) -> str | None:
"""Return the GA4GH digest for the object.
Only GA4GH identifiable objects are GA4GH digestible.
If ``as_version`` is provided, other parameters are ignored and a digest is returned
following the conventions of the VRS version indicated by ``as_version_``.
Raises ``ValueError`` if ``as_version`` is not a ``PrevVrsVersion``.
Expand All @@ -197,7 +176,7 @@ def ga4gh_digest(vro: BaseModel, overwrite: bool = False, as_version: PrevVrsVer
"""
PrevVrsVersion.validate(as_version)

if vro.is_ga4gh_identifiable(): # Only GA4GH identifiable objects are GA4GH digestible
if vro.is_ga4gh_identifiable():
if as_version is None:
return vro.get_or_create_digest(overwrite)
else:
Expand All @@ -206,30 +185,6 @@ def ga4gh_digest(vro: BaseModel, overwrite: bool = False, as_version: PrevVrsVer
return None


def replace_with_digest(val: dict) -> Union[str, dict]:
"""
If val has a digest computed, return it, else return val
"""
if isinstance(val, dict) and val.get("digest", None) is not None:
return val["digest"]
return val


def collapse_identifiable_values(obj: dict) -> dict:
"""
Replaces dict values with their digests if they are defined.
Does not collapse the top level object, only objects it contains.
"""
if isinstance(obj, dict):
obj = {
k: replace_with_digest(collapse_identifiable_values(obj[k]))
for k in obj.keys()
}
elif isinstance(obj, list) or isinstance(obj, set):
obj = [replace_with_digest(collapse_identifiable_values(elem)) for elem in obj]
return obj


def ga4gh_serialize(obj: BaseModel, as_version: PrevVrsVersion | None = None) -> Optional[bytes]:
"""Serializes an object for use in computed digest computation.
Expand Down
18 changes: 0 additions & 18 deletions src/ga4gh/core/pydantic.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,24 +21,6 @@ def getattr_in(obj, names) -> Any:
return v


def is_ga4gh_identifiable(o: Any) -> bool:
"""
Determine if object is a GA4GH identifiable type.
:param o: Object
:return: `True` if `o` is a GA4GH Identifiable Object. `False` otherwise.
"""
return o.is_ga4gh_identifiable()


def is_literal(o: Any) -> bool:
return isinstance(o, (str, int, float, complex, bool))


def is_list(o: Any) -> bool:
return isinstance(o, list)


def is_curie_type(o: Any) -> bool:
"""
Returns true if the object is a str-like matching the CURIE pattern.
Expand Down
7 changes: 1 addition & 6 deletions src/ga4gh/vrs/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,6 @@
from pydantic import BaseModel, Field, RootModel, StringConstraints, model_serializer

from ga4gh.core.pydantic import (
is_ga4gh_identifiable,
getattr_in
)
from ga4gh.core.entity_models import IRI, Expression, DomainEntity
Expand All @@ -44,7 +43,6 @@ def is_coll(thing):
Return True if the thing looks like a collection.
This is not exhaustive, do not use in general.
"""
# return hasattr(thing, '__iter__') and not isinstance(thing, str) and not inspect.isclass(thing)
return type(thing) in [list, set]
if is_coll(vals):
for x in vals:
Expand Down Expand Up @@ -94,7 +92,7 @@ def pydantic_class_refatt_map():
# Types directly reffable
reffable_classes = list(filter(
lambda c: ('id' in c.model_fields
and is_ga4gh_identifiable(c)),
and c.is_ga4gh_identifiable()),
model_classes
))
# Types reffable because they are a union of reffable types
Expand Down Expand Up @@ -235,9 +233,6 @@ def is_ga4gh_identifiable():
def has_valid_ga4gh_id(self):
return self.id and GA4GH_IR_REGEXP.match(self.id) is not None

def has_valid_digest(self):
return bool(self.digest) # Pydantic constraint ensures digest field value is valid

def compute_digest(self, store=True, as_version: PrevVrsVersion | None = None) -> str:
"""A sha512t24u digest created using the VRS Computed Identifier algorithm.
Expand Down

0 comments on commit 7479a10

Please sign in to comment.