Skip to content

Commit

Permalink
wip
Browse files Browse the repository at this point in the history
  • Loading branch information
korikuzma committed Jul 10, 2023
1 parent 8b458b1 commit b6b5888
Show file tree
Hide file tree
Showing 7 changed files with 46 additions and 202 deletions.
11 changes: 7 additions & 4 deletions src/ga4gh/core/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,10 +8,13 @@
from ._internal.digests import sha512t24u
from ._internal.enderef import ga4gh_enref, ga4gh_deref
from ._internal.exceptions import GA4GHError
from ._internal.identifiers import (ga4gh_digest, ga4gh_identify, ga4gh_serialize, is_ga4gh_identifier,
parse_ga4gh_identifier)
from ._internal.jsonschema import (build_models, build_class_referable_attribute_map, is_pjs_class, is_pjs_instance,
is_curie, is_identifiable, is_literal, pjs_copy)
from ._internal.identifiers import (
ga4gh_digest, ga4gh_identify, ga4gh_serialize, is_ga4gh_identifier,
parse_ga4gh_identifier
)
from ._internal.pydantic import (
is_pydantic_instance, is_curie, is_identifiable, is_literal, pjs_copy
)

try:
__version__ = get_distribution(__name__).version
Expand Down
13 changes: 4 additions & 9 deletions src/ga4gh/core/_internal/enderef.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,12 +9,7 @@
"""

import logging

from .identifiers import ga4gh_identify, is_ga4gh_identifier
from .jsonschema import is_array, is_curie, is_identifiable, is_pjs_instance, pjs_copy

_logger = logging.getLogger(__name__)


def ga4gh_enref(o, cra_map, object_store=None):
Expand All @@ -40,7 +35,7 @@ def _enref(o):
ref_att_names = cra_map.get(o.type, [])
for ran in ref_att_names:
v = o[ran]
if is_array(v):
if is_list(v):
o[ran] = [_enref(o2) for o2 in v]
elif isinstance(v, str):
pass
Expand All @@ -53,7 +48,7 @@ def _enref(o):

return _id_and_store(o)

if not is_pjs_instance(o):
if not is_pydantic_instance(o):
raise ValueError("Called ga4gh_enref() with non-python_jsonschema_object instance")
if not is_identifiable(o):
raise ValueError("Called ga4gh_enref() with non-identifiable object")
Expand Down Expand Up @@ -82,7 +77,7 @@ def _deref(o):
ref_att_names = cra_map[o.type]
for ran in ref_att_names:
v = o[ran]
if is_array(v):
if is_list(v):
o[ran] = [_deref(object_store[str(curie)]) for curie in v]
elif is_ga4gh_identifier(v):
o[ran] = _deref(object_store[str(v)])
Expand All @@ -91,7 +86,7 @@ def _deref(o):

return o

if not is_pjs_instance(o):
if not is_pydantic_instance(o):
raise ValueError("Called ga4gh_deref() with non-python_jsonschema_object instance")
if not is_identifiable(o):
raise ValueError("Called ga4gh_deref() with non-identifiable object")
Expand Down
37 changes: 6 additions & 31 deletions src/ga4gh/core/_internal/identifiers.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,32 +14,15 @@
For that reason, they are implemented here in one file.
"""

import logging
import os
import re

from canonicaljson import encode_canonical_json
import pkg_resources
import yaml

from .digests import sha512t24u
from .jsonschema import is_array, is_pjs_instance, is_curie_type, is_identifiable, is_literal
from .pydantic import is_list, is_pydantic_instance, is_curie_type, is_identifiable, is_literal

__all__ = "ga4gh_digest ga4gh_identify ga4gh_serialize is_ga4gh_identifier parse_ga4gh_identifier".split()

_logger = logging.getLogger(__name__)

# Assume that ga4gh.yaml and vrs.yaml files are in the same directory for now
schema_dir = os.environ.get("VRSATILE_SCHEMA_DIR", pkg_resources.resource_filename(__name__, "data/schemas/vrsatile"))
cfg = yaml.safe_load(open(schema_dir + "/merged.yaml"))
defs = cfg["definitions"]

type_prefix_map_default = dict()
for k,v in defs.items():
if "ga4gh_prefix" in v:
type_prefix_map_default[k] = v["ga4gh_prefix"]

namespace = "ga4gh"
curie_sep = ":"
ref_sep = "."
Expand Down Expand Up @@ -96,15 +79,8 @@ def ga4gh_identify(vro, type_prefix_map=None):
'ga4gh:VSL.u5fspwVbQ79QkX6GHLF8tXPCAXFJqRPx'
"""

if type_prefix_map is None:
type_prefix_map = type_prefix_map_default
try:
pfx = type_prefix_map[vro.type]
except KeyError:
_logger.debug("No identifier prefix is defined for %s; check ga4gh.yaml", vro.type)
return None
digest = ga4gh_digest(vro)
pfx = vro.prefix
ir = f"{namespace}{curie_sep}{pfx}{ref_sep}{digest}"
return ir

Expand Down Expand Up @@ -173,17 +149,16 @@ def dictify(vro, enref=True):
v = v.split(ref_sep, 1)[1]
return v

if is_pjs_instance(vro):
if is_pydantic_instance(vro):
if is_identifiable(vro) and enref:
return ga4gh_digest(vro)

d = {k: dictify(vro[k], enref=True)
for k in vro
if not (k.startswith("_") or
(k == "id" and vro.type in type_prefix_map_default) or
vro[k] is None)}
if k in vro.ga4gh_digest_keys}
return d

if is_array(vro):
if is_list(vro):
if is_curie_type(vro[0]):
return sorted(dictify(o) for o in vro.data)
return sorted([dictify(o) for o in vro.typed_elems])
Expand Down
154 changes: 0 additions & 154 deletions src/ga4gh/core/_internal/jsonschema.py

This file was deleted.

25 changes: 25 additions & 0 deletions src/ga4gh/core/_internal/pydantic.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
def is_identifiable(o: any) -> bool:
"""Determine if object is identifiable. An object is considered identifiable if
contains a `ga4gh_digest_keys` attribute
:param o: Object
:return: `True` if `o` has `ga4gh_digest_keys` attribute. `False` otherwise.
"""
return hasattr(o, "ga4gh_digest_keys")


def is_literal(o: any) -> bool:
return isinstance(o, (str, int, float, complex, bool))


def is_list(o: any) -> bool:
return isinstance(o, list)


def is_curie_type(o: any) -> bool:
# return isinstance(o, CURIE)
pass

def is_pydantic_instance(o: any) -> bool:
# return isinstance(o, BaseModel)
pass
4 changes: 2 additions & 2 deletions src/ga4gh/vrs/normalize.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
import logging

from bioutils.normalize import normalize as _normalize, NormalizationMode
from ga4gh.core import is_pjs_instance, pjs_copy, ga4gh_digest
from ga4gh.core import is_pydantic_instance, pjs_copy, ga4gh_digest

from ._internal import models
from .dataproxy import SequenceProxy
Expand Down Expand Up @@ -71,7 +71,7 @@ def _normalize_variationset(o, data_proxy=None):
def normalize(vo, data_proxy=None):
"""normalize given vrs object, regardless of type"""

assert is_pjs_instance(vo)
assert is_pydantic_instance(vo)
vo_type = vo.type._value

if vo_type in handlers:
Expand Down
4 changes: 2 additions & 2 deletions tests/test_vrs.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
from ga4gh.core import sha512t24u, ga4gh_digest, ga4gh_serialize, ga4gh_identify, is_pjs_instance
from ga4gh.core import sha512t24u, ga4gh_digest, ga4gh_serialize, ga4gh_identify, is_pydantic_instance
from ga4gh.vrs import models, vrs_deref, vrs_enref

allele_dict = {
Expand All @@ -22,7 +22,7 @@ def test_vr():

assert a.as_dict() == allele_dict

assert is_pjs_instance(a.location)
assert is_pydantic_instance(a.location)

assert ga4gh_serialize(
a.location
Expand Down

0 comments on commit b6b5888

Please sign in to comment.