Skip to content

Commit

Permalink
progress: need to somehow get ID though
Browse files Browse the repository at this point in the history
  • Loading branch information
jsstevenson committed Nov 4, 2024
1 parent b8f27fa commit 06ec7f2
Show file tree
Hide file tree
Showing 3 changed files with 28 additions and 13 deletions.
6 changes: 5 additions & 1 deletion src/ga4gh/core/identifiers.py
Original file line number Diff line number Diff line change
Expand Up @@ -113,7 +113,11 @@ def is_ga4gh_identifier(ir):
return str(get_pydantic_root(ir)).startswith(NS_W_SEP)


def ga4gh_identify(vro, in_place: str = 'default', as_version: PrevVrsVersion | None = None) -> str | None:
def ga4gh_identify(
vro,
in_place: str = 'default',
as_version: PrevVrsVersion | None = None
) -> str | None:
"""Return the GA4GH digest-based id for the object, as a CURIE
(string). Returns None if object is not identifiable.
Expand Down
23 changes: 11 additions & 12 deletions src/ga4gh/vrs/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -197,17 +197,17 @@ class Syntax(str, Enum):
SPDI = "spdi"


def _recurse_ga4gh_serialize(obj):
def _recurse_ga4gh_serialize(obj, store_digest: bool = True):
if isinstance(obj, _Ga4ghIdentifiableObject):
return obj.get_or_create_digest()
return obj.get_or_create_digest(store=store_digest)
elif isinstance(obj, _ValueObject):
return obj.ga4gh_serialize()
elif isinstance(obj, RootModel):
return _recurse_ga4gh_serialize(obj.model_dump())
return _recurse_ga4gh_serialize(obj.model_dump(), store_digest)
elif isinstance(obj, str):
return obj
elif isinstance(obj, list):
return [_recurse_ga4gh_serialize(x) for x in obj]
return [_recurse_ga4gh_serialize(x, store_digest) for x in obj]
else:
return obj

Expand All @@ -220,11 +220,11 @@ class _ValueObject(Entity, ABC):
def __hash__(self):
return encode_canonical_json(self.ga4gh_serialize()).decode("utf-8").__hash__()

def ga4gh_serialize(self) -> Dict:
def ga4gh_serialize(self, store_digest: bool = True) -> Dict:
out = OrderedDict()
for k in self.ga4gh.keys:
v = getattr(self, k)
out[k] = _recurse_ga4gh_serialize(v)
out[k] = _recurse_ga4gh_serialize(v, store_digest=store_digest)
return out

class ga4gh:
Expand Down Expand Up @@ -266,7 +266,7 @@ def compute_digest(self, store: bool = True, as_version: PrevVrsVersion | None =
returned following the conventions of the VRS version indicated by ``as_version_``.
"""
if as_version is None:
digest = sha512t24u(encode_canonical_json(self.ga4gh_serialize()))
digest = sha512t24u(encode_canonical_json(self.ga4gh_serialize(store_digest=store)))
if store:
self.digest = digest
else:
Expand All @@ -281,7 +281,6 @@ def get_or_create_ga4gh_identifier(
in_place: str = 'default',
recompute: bool = False,
as_version: PrevVrsVersion | None = None,
store_digest: bool = True,
) -> str:
"""Sets and returns a GA4GH Computed Identifier for the object.
Overwrites the existing identifier if overwrite is True.
Expand All @@ -301,8 +300,8 @@ def get_or_create_ga4gh_identifier(
:param recompute:
:param as_version: If provided, other parameters are ignored and a computed
identifier is returned following the conventions of the given VRS version.
:param store_digest: if ``False``, don't set the object's ``digest`` field.
"""
store_digest = in_place != 'never'
if as_version is not None:
return self.compute_ga4gh_identifier(as_version=as_version)

Expand Down Expand Up @@ -338,7 +337,7 @@ def compute_ga4gh_identifier(
self.get_or_create_digest(recompute, store=store_digest)
return f'{CURIE_NAMESPACE}{CURIE_SEP}{self.ga4gh.prefix}{GA4GH_PREFIX_SEP}{self.digest}'
else:
digest = self.compute_digest(as_version=as_version)
digest = self.compute_digest(store=store_digest, as_version=as_version)
return f'{CURIE_NAMESPACE}{CURIE_SEP}{self.ga4gh.priorPrefix[as_version]}{GA4GH_PREFIX_SEP}{digest}'

def get_or_create_digest(self, recompute: bool = False, store: bool = True) -> str:
Expand Down Expand Up @@ -674,8 +673,8 @@ class CisPhasedBlock(_VariationBase):
)
sequenceReference: Optional[SequenceReference] = Field(None, description="An optional Sequence Reference on which all of the in-cis Alleles are found. When defined, this may be used to implicitly define the `sequenceReference` attribute for each of the CisPhasedBlock member Alleles.")

def ga4gh_serialize(self) -> Dict:
out = _ValueObject.ga4gh_serialize(self)
def ga4gh_serialize(self, store_digest: bool = True) -> Dict:
out = _ValueObject.ga4gh_serialize(self, store_digest=store_digest)
out["members"] = sorted(out["members"])
return out

Expand Down
12 changes: 12 additions & 0 deletions tests/test_vrs.py
Original file line number Diff line number Diff line change
Expand Up @@ -308,6 +308,8 @@ def test_compute_identifiers_when():
# when id property is missing
vo_a = models.Allele(**a)
assert ga4gh_identify(vo_a, in_place='never') == correct_id
assert vo_a.digest is None
assert vo_a.location.digest is None
with use_ga4gh_compute_identifier_when(VrsObjectIdentifierIs.ANY):
assert ga4gh_identify(vo_a, in_place='never') == correct_id
with use_ga4gh_compute_identifier_when(VrsObjectIdentifierIs.GA4GH_INVALID):
Expand All @@ -319,6 +321,8 @@ def test_compute_identifiers_when():
a["id"] = None
vo_a = models.Allele(**a)
assert ga4gh_identify(vo_a, in_place='never') == correct_id
assert vo_a.digest is None
assert vo_a.location.digest is None
with use_ga4gh_compute_identifier_when(VrsObjectIdentifierIs.ANY):
assert ga4gh_identify(vo_a, in_place='never') == correct_id
with use_ga4gh_compute_identifier_when(VrsObjectIdentifierIs.GA4GH_INVALID):
Expand All @@ -330,6 +334,8 @@ def test_compute_identifiers_when():
a["id"] = ""
vo_a = models.Allele(**a)
assert ga4gh_identify(vo_a, in_place='never') == correct_id
assert vo_a.digest is None
assert vo_a.location.digest is None
with use_ga4gh_compute_identifier_when(VrsObjectIdentifierIs.ANY):
assert ga4gh_identify(vo_a, in_place='never') == correct_id
with use_ga4gh_compute_identifier_when(VrsObjectIdentifierIs.GA4GH_INVALID):
Expand All @@ -341,6 +347,8 @@ def test_compute_identifiers_when():
a["id"] = syntax_invalid_id
vo_a = models.Allele(**a)
assert ga4gh_identify(vo_a, in_place='never') == correct_id
assert vo_a.digest is None
assert vo_a.location.digest is None
with use_ga4gh_compute_identifier_when(VrsObjectIdentifierIs.ANY):
assert ga4gh_identify(vo_a, in_place='never') == correct_id
with use_ga4gh_compute_identifier_when(VrsObjectIdentifierIs.GA4GH_INVALID):
Expand All @@ -352,6 +360,8 @@ def test_compute_identifiers_when():
a["id"] = syntax_valid_id
vo_a = models.Allele(**a)
assert ga4gh_identify(vo_a, in_place='never') == correct_id
assert vo_a.digest is None
assert vo_a.location.digest is None
with use_ga4gh_compute_identifier_when(VrsObjectIdentifierIs.ANY):
assert ga4gh_identify(vo_a, in_place='never') == correct_id
with use_ga4gh_compute_identifier_when(VrsObjectIdentifierIs.GA4GH_INVALID):
Expand All @@ -364,6 +374,8 @@ def test_compute_identifiers_when():
vo_a = models.Allele(**a)
assert ga4gh_identify(vo_a, in_place='never') == correct_id
assert ga4gh_identify(vo_a, in_place='never') is not correct_id
assert vo_a.digest is None
assert vo_a.location.digest is None
with use_ga4gh_compute_identifier_when(VrsObjectIdentifierIs.ANY):
assert ga4gh_identify(vo_a, in_place='never') == correct_id
assert ga4gh_identify(vo_a, in_place='never') is not correct_id
Expand Down

0 comments on commit 06ec7f2

Please sign in to comment.