Skip to content

Commit

Permalink
feat!: update vrs 2.0-alpha models (#257)
Browse files Browse the repository at this point in the history
* Made use of the classes in core-source/vrs-source so we don't repeat code
  * To use core models: `from ga4gh.core import core_models` 
* Changed enums keys to uppercase
* Update tests since digests changed
  • Loading branch information
korikuzma committed Sep 12, 2023
1 parent 4e83a9c commit f8ed3b1
Show file tree
Hide file tree
Showing 11 changed files with 505 additions and 400 deletions.
1 change: 1 addition & 0 deletions src/ga4gh/core/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
from ._internal.pydantic import (
is_pydantic_instance, is_curie_type, is_identifiable, is_literal, pydantic_copy
)
from ._internal import models as core_models

try:
__version__ = version(__name__)
Expand Down
25 changes: 20 additions & 5 deletions src/ga4gh/core/_internal/identifiers.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@

import logging
import re
from typing import Union, Tuple
from typing import Union, Optional
from pydantic import BaseModel, RootModel
from canonicaljson import encode_canonical_json

Expand Down Expand Up @@ -101,6 +101,12 @@ def ga4gh_identify(vro):
return None


def _is_sequence_reference(input_obj) -> bool:
"""Determine if `input_obj` is a Sequence Reference"""

return getattr_in(input_obj, ["ga4gh", "assigned", "default"]) and input_obj.type == "SequenceReference"


def ga4gh_digest(vro: BaseModel, do_compact=True):
"""
Return the GA4GH digest for the object.
Expand All @@ -116,8 +122,12 @@ def ga4gh_digest(vro: BaseModel, do_compact=True):
'u5fspwVbQ79QkX6GHLF8tXPCAXFJqRPx'
"""
s = ga4gh_serialize(vro)
return sha512t24u(s)
if _is_sequence_reference(vro):
digest = vro.refgetAccession.split("SQ.")[-1]
else:
s = ga4gh_serialize(vro)
digest = sha512t24u(s)
return digest


def replace_with_digest(val: dict) -> Union[str, dict]:
Expand All @@ -144,11 +154,14 @@ def collapse_identifiable_values(obj: dict) -> dict:
return obj


def ga4gh_serialize(obj: BaseModel) -> bytes:
def ga4gh_serialize(obj: BaseModel) -> Optional[bytes]:
"""
TODO find a way to output identify_all without the 'digest' fields on subobjects,
without traversing the whole tree again in collapse_identifiable_values.
"""
if _is_sequence_reference(obj):
return None

identified = identify_all(obj)
if isinstance(identified, dict):
# Replace identifiable subobjects with their digests
Expand Down Expand Up @@ -185,7 +198,7 @@ def identify_all(
) -> Union[str, dict]:
"""
Adds digests to an identifiable Pydantic object and any identifiable Pydantic
objects in its fields, at any depth.
objects in its fields, at any depth. Assumes IRIs are dereferenced.
Returns the identified object tree, and the tree with identified objects
replaced with their digests.
Expand All @@ -206,6 +219,8 @@ def identify_all(
exported_obj = export_pydantic_model(input_obj)
if "digest" in exported_obj and exported_obj["digest"] is not None:
output_obj = exported_obj
elif _is_sequence_reference(input_obj):
output_obj = exported_obj["refgetAccession"].split("SQ.")[-1]
else:
# Take static key set from the object, or use all fields
include_keys = getattr_in(input_obj, ["ga4gh", "keys"])
Expand Down
253 changes: 253 additions & 0 deletions src/ga4gh/core/_internal/models.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,253 @@
"""
**This module should not be imported directly.**
Instead, users should use one of the following:
* `from ga4gh.core import core_models`, and refer to models with the
abbreviated name, e.g., `core_models.Gene` (recommended)
* `import ga4gh.core`, and refer to models using the fully-qualified
module name, e.g., `ga4gh.core.core_models.Gene`
"""
from typing import Any, Dict, List, Literal, Optional, Union
from enum import Enum

from pydantic import BaseModel, ConfigDict, Field, RootModel, constr


class Relation(Enum):
"""A mapping relation between concepts as defined by the Simple Knowledge
Organization System (SKOS).
"""

CLOSE_MATCH = 'closeMatch'
EXACT_MATCH = 'exactMatch'
BROAD_MATCH = 'broadMatch'
NARROW_MATCH = 'narrowMatch'
RELATED_MATCH = 'relatedMatch'


class Code(RootModel):
"""Indicates that the value is taken from a set of controlled strings defined
elsewhere. Technically, a code is restricted to a string which has at least one
character and no leading or trailing whitespace, and where there is no whitespace
other than single spaces in the contents."""

root: constr(pattern=r'\S+( \S+)*') = Field(
...,
description='Indicates that the value is taken from a set of controlled strings defined elsewhere. Technically, a code is restricted to a string which has at least one character and no leading or trailing whitespace, and where there is no whitespace other than single spaces in the contents.',
example='ENSG00000139618',
)


class IRI(RootModel):
root: str = Field(
...,
description='An IRI Reference (either an IRI or a relative-reference), according to `RFC3986 section 4.1 <https://datatracker.ietf.org/doc/html/rfc3986#section-4.1>` and `RFC3987 section 2.1 <https://datatracker.ietf.org/doc/html/rfc3987#section-2.1>`. MAY be a JSON Pointer as an IRI fragment, as described by `RFC6901 section 6 <https://datatracker.ietf.org/doc/html/rfc6901#section-6>`.',
)


class Extension(BaseModel):
"""The Extension class provides VODs with a means to extend descriptions with other
attributes unique to a content provider. These extensions are not expected to be
natively understood under VRSATILE, but may be used for pre-negotiated exchange of
message attributes when needed.
"""
model_config = ConfigDict(
extra='allow',
)
type: Literal['Extension'] = Field('Extension', description='MUST be "Extension".')
name: str = Field(..., description='A name for the Extension')
value: Optional[Union[float, str, bool, Dict[str, Any], List[Any]]] = Field(
None, description='Any primitive or structured object'
)


class _Entity(BaseModel):
"""Entity is the root class of `core` classes model - those that have identifiers
and other general metadata like labels, xrefs, urls, descriptions, etc. All core
classes descend from and inherit its attributes.
"""

id: Optional[str] = Field(
None,
description="The 'logical' identifier of the entity in the system of record, e.g. a UUID. This 'id' is unique within a given system. The identified entity may have a different 'id' in a different system, or may refer to an 'id' for the shared concept in another system (e.g. a CURIE)."
)
label: Optional[str] = Field(
None,
description='A primary label for the entity.'
)
description: Optional[str] = Field(
None,
description='A free-text description of the entity.'
)
extensions: Optional[List[Extension]] = None



class Coding(BaseModel):
"""a concept codified by a terminology system."""

label: Optional[str] = Field(
None,
description='A primary label for the coding.'
)
system: str = Field(
...,
description='Identity of the terminology system.'
)
version: Optional[str] = Field(
None,
description='Version of the terminology system.'
)
code: Code = Field(
...,
description='Symbol in syntax defined by the terminology system.'
)


class Mapping(_Entity):
"""A mapping to a concept in a terminology system."""
model_config = ConfigDict(
use_enum_values=True
)

coding: Coding
relation: Relation = Field(
...,
description='A mapping relation between concepts as defined by the Simple Knowledge Organization System (SKOS).'
)


class _MappableEntity(_Entity):
"""an Entity that is mappable to codings in other terminology systems."""

mappings: Optional[List[Mapping]] = None


class _DomainEntity(_MappableEntity):
"""An Entity that is specific to a particular biomedical domain such as disease,
therapeutics, or genes.
"""

type: str
aliases: Optional[List[str]] = Field(
None,
description='Aliases are alternate labels for a Domain Entity.'
)


class Phenotype(_DomainEntity):
"""An observable characteristic or trait of an organism."""

type: Literal['Phenotype'] = Field(
'Phenotype',
description='MUST be "Phenotype".'
)


class Disease(_DomainEntity):
"""A particular abnormal condition that negatively affects the structure or function
of all or part of an organism and is not immediately due to any external injury.
"""

type: Literal['Disease'] = Field(
'Disease',
description='MUST be "Disease".'
)


class TraitSet(_DomainEntity):
"""A set of phenotype and/or disease concepts that together constitute a condition."""

type: Literal['TraitSet'] = Field(
'TraitSet',
description='MUST be "TraitSet".'
)
traits: List[Union[Disease, Phenotype]] = Field(
...,
min_length=2
)


class TherapeuticAction(_DomainEntity):
"""A therapeutic action taken that is intended to alter or stop a pathologic process."""

type: Literal['TherapeuticAction'] = Field(
'TherapeuticAction',
description='MUST be "TherapeuticAction".'
)


class TherapeuticAgent(_DomainEntity):
"""An administered therapeutic agent that is intended to alter or stop a pathologic process."""

type: Literal['TherapeuticAgent'] = Field(
'TherapeuticAgent',
description='MUST be "TherapeuticAgent".'
)


class TherapeuticSubstituteGroup(_DomainEntity):
"""A group of therapeutic procedures that may be treated as substitutes for one another."""

type: Literal['TherapeuticSubstituteGroup'] = Field(
'TherapeuticSubstituteGroup',
description='MUST be "TherapeuticSubstituteGroup".'
)
substitutes: List[Union[TherapeuticAction, TherapeuticAgent]] = Field(
...,
description='The individual therapeutic procedures that may be treated as substitutes.',
min_length=2
)


class CombinationTherapy(_DomainEntity):
"""A therapeutic procedure that involves multiple different therapeutic procedures
performed in combination.
"""

type: Literal['CombinationTherapy'] = Field(
'CombinationTherapy',
description='MUST be "CombinationTherapy".'
)
components: List[Union[
TherapeuticSubstituteGroup,
TherapeuticAction,
TherapeuticAgent
]] = Field(
...,
description='The individual therapeutic procedure components that constitute the combination therapy.',
min_length=2
)


class Condition(RootModel):
"""A disease or other medical disorder."""

root: Union[Disease, Phenotype, TraitSet] = Field(
...,
description='A disease or other medical disorder.',
discriminator='type',
)


class TherapeuticProcedure(RootModel):
"""An action or administration of therapeutic agents to produce an effect that is
intended to alter or stop a pathologic process.
"""

root: Union[CombinationTherapy, TherapeuticAction, TherapeuticAgent, TherapeuticSubstituteGroup] = Field(
...,
description='An action or administration of therapeutic agents to produce an effect that is intended to alter or stop a pathologic process.',
discriminator='type',
)


class Gene(_DomainEntity):
"""A basic physical and functional unit of heredity."""

type: Literal['Gene'] = Field(
'Gene',
description='MUST be "Gene".'
)
Loading

0 comments on commit f8ed3b1

Please sign in to comment.