Skip to content

Commit

Permalink
updated Terminus/DerivativeMolecule changes from vrs 2.x and fixed te…
Browse files Browse the repository at this point in the history
…sts.
  • Loading branch information
larrybabb committed Aug 12, 2024
1 parent 4da6776 commit 0818b06
Show file tree
Hide file tree
Showing 4 changed files with 59 additions and 36 deletions.
73 changes: 48 additions & 25 deletions src/ga4gh/vrs/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -144,8 +144,9 @@ class VrsType(str, Enum):
ALLELE = "Allele"
CIS_PHASED_BLOCK = "CisPhasedBlock"
ADJACENCY = "Adjacency"
SEQ_TERMINUS = "SequenceTerminus"
DERIVATIVE_SEQ = "DerivativeSequence"
TERMINUS = "Terminus"
TRAVERSAL_BLOCK = "TraversalBlock"
DERIVATIVE_MOL = "DerivativeMolecule"
CN_COUNT = "CopyNumberCount"
CN_CHANGE = "CopyNumberChange"

Expand All @@ -163,14 +164,14 @@ class ResidueAlphabet(str, Enum):
class CopyChange(str, Enum):
"""Define constraints for copy change"""

EFO_0030069 = 'efo:0030069'
EFO_0020073 = 'efo:0020073'
EFO_0030068 = 'efo:0030068'
EFO_0030067 = 'efo:0030067'
EFO_0030064 = 'efo:0030064'
EFO_0030070 = 'efo:0030070'
EFO_0030071 = 'efo:0030071'
EFO_0030072 = 'efo:0030072'
EFO_0030069 = 'EFO:0030069'
EFO_0020073 = 'EFO:0020073'
EFO_0030068 = 'EFO:0030068'
EFO_0030067 = 'EFO:0030067'
EFO_0030064 = 'EFO:0030064'
EFO_0030070 = 'EFO:0030070'
EFO_0030071 = 'EFO:0030071'
EFO_0030072 = 'EFO:0030072'


def _recurse_ga4gh_serialize(obj):
Expand Down Expand Up @@ -419,7 +420,6 @@ class ga4gh(_ValueObject.ga4gh):
'type'
]


#########################################
# vrs location
#########################################
Expand Down Expand Up @@ -626,37 +626,60 @@ class ga4gh(_Ga4ghIdentifiableObject.ga4gh):
]


class SequenceTerminus(_VariationBase):
"""The `SequenceTerminus` data class provides a structure for describing the end
class Terminus(_VariationBase):
"""The `Terminus` data class provides a structure for describing the end
(terminus) of a sequence. Structurally similar to Adjacency but the linker sequence
is not allowed and it removes the unnecessary array structure.
"""

type: Literal["SequenceTerminus"] = Field(VrsType.SEQ_TERMINUS.value, description=f'MUST be "{VrsType.SEQ_TERMINUS.value}"')
type: Literal["Terminus"] = Field(VrsType.TERMINUS.value, description=f'MUST be "{VrsType.TERMINUS.value}"')
location: Union[IRI, SequenceLocation] = Field(..., description="The location of the terminus.")

class ga4gh(_Ga4ghIdentifiableObject.ga4gh):
prefix = "SQX"
prefix = "TM"
keys = [
"location",
"type"
]

class TraversalBlock(_ValueObject):
"""A component used to describe the orientation of a molecular variation within
a DerivativeMolecule."""

type: Literal["TraversalBlock"] = Field(
VrsType.TRAVERSAL_BLOCK.value, description=f'MUST be "{VrsType.TRAVERSAL_BLOCK.value}"'
)
orientation: Literal["forward", "reverse_complement"] = Field(
...,
description='The orientation of the traversal block, either forward or reverse_complement.'
)

component: Union[IRI, Adjacency, Allele, Terminus, CisPhasedBlock] = Field(
...,
description="The component that make up the derivative molecule."
)

class ga4gh(_ValueObject.ga4gh):
keys = [
'component',
'orientation',
'type'
]

class DerivativeSequence(_VariationBase):
"""The "Derivative Sequence" data class is a structure for describing a derivate
sequence composed from multiple sequence adjacencies.
class DerivativeMolecule(_VariationBase):
"""The "Derivative Molecule" data class is a structure for describing a derivate
molecule composed from multiple sequence components.
"""

type: Literal["DerivativeSequence"] = Field(VrsType.DERIVATIVE_SEQ.value, description=f'MUST be "{VrsType.DERIVATIVE_SEQ.value}"')
components: List[Union[IRI, Adjacency, Allele, SequenceTerminus, CisPhasedBlock]] = Field(
type: Literal["DerivativeMolecule"] = Field(VrsType.DERIVATIVE_MOL.value, description=f'MUST be "{VrsType.DERIVATIVE_MOL.value}"')
components: List[TraversalBlock] = Field(
...,
description="The sequence components that make up the derivative sequence.",
description="The traversal block components that make up the derivative molecule.",
min_length=2
)

class ga4gh(_Ga4ghIdentifiableObject.ga4gh):
prefix = "DSQ"
prefix = "DM"
keys = [
"components",
"type"
Expand Down Expand Up @@ -706,7 +729,7 @@ class CopyNumberChange(_CopyNumber):
type: Literal["CopyNumberChange"] = Field(VrsType.CN_CHANGE.value, description=f'MUST be "{VrsType.CN_CHANGE.value}"')
copyChange: CopyChange = Field(
...,
description='MUST be one of "efo:0030069" (complete genomic loss), "efo:0020073" (high-level loss), "efo:0030068" (low-level loss), "efo:0030067" (loss), "efo:0030064" (regional base ploidy), "efo:0030070" (gain), "efo:0030071" (low-level gain), "efo:0030072" (high-level gain).',
description='MUST be one of "EFO:0030069" (complete genomic loss), "EFO:0020073" (high-level loss), "EFO:0030068" (low-level loss), "EFO:0030067" (loss), "EFO:0030064" (regional base ploidy), "EFO:0030070" (gain), "EFO:0030071" (low-level gain), "EFO:0030072" (high-level gain).',
)

class ga4gh(_Ga4ghIdentifiableObject.ga4gh):
Expand All @@ -726,7 +749,7 @@ class ga4gh(_Ga4ghIdentifiableObject.ga4gh):
class MolecularVariation(RootModel):
"""A `variation` on a contiguous molecule."""

root: Union[Allele, CisPhasedBlock, Adjacency, SequenceTerminus, DerivativeSequence] = Field(
root: Union[Allele, CisPhasedBlock, Adjacency, Terminus, DerivativeMolecule] = Field(
...,
json_schema_extra={
'description': 'A `variation` on a contiguous molecule.'
Expand Down Expand Up @@ -759,7 +782,7 @@ class Location(RootModel):
class Variation(RootModel):
"""A representation of the state of one or more biomolecules."""

root: Union[Allele, CisPhasedBlock, Adjacency, SequenceTerminus, DerivativeSequence, CopyNumberChange, CopyNumberCount] = Field(
root: Union[Allele, CisPhasedBlock, Adjacency, Terminus, DerivativeMolecule, CopyNumberChange, CopyNumberCount] = Field(
...,
json_schema_extra={
'description': 'A representation of the state of one or more biomolecules.'
Expand Down
16 changes: 8 additions & 8 deletions tests/extras/test_cnv_translator.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,8 +17,8 @@ def tlr(rest_dataproxy):
from_hgvs_cx_tests = (
("NC_000013.11:g.26440969_26443305del", models.CopyChange.EFO_0030069,
{'copyChange': 'EFO:0030069',
'digest': 'TvAhuGK6HYf53mXoUnon60cZ7DC_UgM3',
'id': 'ga4gh:CX.TvAhuGK6HYf53mXoUnon60cZ7DC_UgM3',
'digest': 'bY_40M893gdWmNhU598V-T_dYtPJs-pp',
'id': 'ga4gh:CX.bY_40M893gdWmNhU598V-T_dYtPJs-pp',
'location': {'digest': '4akcjXlbAu4xBKnxjOL_b4DM_20HOCA3',
'end': 26443305,
'id': 'ga4gh:SL.4akcjXlbAu4xBKnxjOL_b4DM_20HOCA3',
Expand All @@ -29,8 +29,8 @@ def tlr(rest_dataproxy):
'type': 'CopyNumberChange'}),
("NC_000013.11:g.32379315_32379819del", None,
{'copyChange': 'EFO:0030067',
'digest': 'K1J2muiVvrDWqKnd5cMpFbTP0eJUfeuE',
'id': 'ga4gh:CX.K1J2muiVvrDWqKnd5cMpFbTP0eJUfeuE',
'digest': 'WKtHlbV6XCoKqvyeAJxdFj4ogw9ipDfQ',
'id': 'ga4gh:CX.WKtHlbV6XCoKqvyeAJxdFj4ogw9ipDfQ',
'location': {'digest': '_TUGA9kX6JKdXzUklgN2zWkOvNu5pNmV',
'end': 32379819,
'id': 'ga4gh:SL._TUGA9kX6JKdXzUklgN2zWkOvNu5pNmV',
Expand All @@ -42,8 +42,8 @@ def tlr(rest_dataproxy):
),
("NC_000013.11:g.32332787_32333388dup", models.CopyChange.EFO_0030071,
{'copyChange': 'EFO:0030071',
'digest': '5gODsVN83N1fe9Lc_Octy5rBlkYl8pGU',
'id': 'ga4gh:CX.5gODsVN83N1fe9Lc_Octy5rBlkYl8pGU',
'digest': 'EqI18-X9p8MUDr-Oz2J5GPQppEQKqWMU',
'id': 'ga4gh:CX.EqI18-X9p8MUDr-Oz2J5GPQppEQKqWMU',
'location': {'digest': 'UOA3zJOPfQxxRord_7pBkoMBpt46xcQq',
'end': 32333388,
'id': 'ga4gh:SL.UOA3zJOPfQxxRord_7pBkoMBpt46xcQq',
Expand All @@ -55,8 +55,8 @@ def tlr(rest_dataproxy):
),
("NC_000013.11:g.32344743_32352093dup", None,
{'copyChange': 'EFO:0030070',
'digest': '-sUe85R9UC_RxX7e_B1YsmsdyLsGvvmq',
'id': 'ga4gh:CX.-sUe85R9UC_RxX7e_B1YsmsdyLsGvvmq',
'digest': 'eZVvwCSineeWTGKG3vbJvqkSUMW2JTCH',
'id': 'ga4gh:CX.eZVvwCSineeWTGKG3vbJvqkSUMW2JTCH',
'location': {'digest': '17-a6p7m6QznwxZyVz2QdA9oPf5jTCyT',
'end': 32352093,
'id': 'ga4gh:SL.17-a6p7m6QznwxZyVz2QdA9oPf5jTCyT',
Expand Down
4 changes: 2 additions & 2 deletions tests/test_vrs.py
Original file line number Diff line number Diff line change
Expand Up @@ -265,8 +265,8 @@ def test_class_refatt_map():
'CopyNumberCount': ['location'],
'CopyNumberChange': ['location'],
'Adjacency': ['adjoinedSequences'],
'DerivativeSequence': ['components'],
'SequenceTerminus': ['location']
'TraversalBlock': ['component'],
'Terminus': ['location']
}
assert class_refatt_map_expected == models.class_refatt_map

Expand Down

0 comments on commit 0818b06

Please sign in to comment.