Skip to content

Commit

Permalink
feat!: parameterize normalize in translator (#361)
Browse files Browse the repository at this point in the history
* feat!: parameterize normalize in translator

* new kwarg (do_normalize) in translate_from methods. Set to determine
  if normalization should be performed

* add test from spdi cassette
  • Loading branch information
korikuzma authored Mar 13, 2024
1 parent 39fa484 commit cef704d
Show file tree
Hide file tree
Showing 15 changed files with 464 additions and 381 deletions.
24 changes: 17 additions & 7 deletions src/ga4gh/vrs/extras/translator.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,14 +54,12 @@ def __init__(
self,
data_proxy,
default_assembly_name="GRCh38",
normalize=True,
identify=True,
rle_seq_limit: Optional[int] = 50
):
self.default_assembly_name = default_assembly_name
self.data_proxy = data_proxy
self.identify = identify
self.normalize = normalize
self.rle_seq_limit = rle_seq_limit
self.hgvs_tools = None
self.from_translators = {}
Expand Down Expand Up @@ -127,6 +125,8 @@ def translate_from(self, var, fmt=None, **kwargs):
To exclude `sequence` from the response, set to 0.
For no limit, set to `None`.
Defaults value set in instance variable, `rle_seq_limit`.
do_normalize (bool): `True` if fully justified normalization should be
performed. `False` otherwise. Defaults to `True`
"""
if fmt:
try:
Expand Down Expand Up @@ -225,10 +225,10 @@ class AlleleTranslator(Translator):
"""Class for translating formats to and from VRS Alleles"""

def __init__(
self, data_proxy, default_assembly_name="GRCh38", normalize=True, identify=True
self, data_proxy, default_assembly_name="GRCh38", identify=True
):
"""Initialize AlleleTranslator class"""
super().__init__(data_proxy, default_assembly_name, normalize, identify)
super().__init__(data_proxy, default_assembly_name, identify)

self.from_translators = {
"beacon": self._from_beacon,
Expand All @@ -253,6 +253,8 @@ def _from_beacon(self, beacon_expr, **kwargs):
To exclude `sequence` from the response, set to 0.
For no limit, set to `None`.
Defaults value set in instance variable, `rle_seq_limit`.
do_normalize (bool): `True` if fully justified normalization should be
performed. `False` otherwise. Defaults to `True`
#>>> a = tlr.from_beacon("19 : 44908822 C > T")
#>>> a.model_dump()
Expand Down Expand Up @@ -316,6 +318,8 @@ def _from_gnomad(self, gnomad_expr, **kwargs):
To exclude `sequence` from the response, set to 0.
For no limit, set to `None`.
Defaults value set in instance variable, `rle_seq_limit`.
do_normalize (bool): `True` if fully justified normalization should be
performed. `False` otherwise. Defaults to `True`
#>>> a = tlr.from_gnomad("1-55516888-G-GA")
#>>> a.model_dump()
Expand Down Expand Up @@ -378,6 +382,8 @@ def _from_hgvs(self, hgvs_expr: str, **kwargs):
To exclude `sequence` from the response, set to 0.
For no limit, set to `None`.
Defaults value set in instance variable, `rle_seq_limit`.
do_normalize (bool): `True` if fully justified normalization should be
performed. `False` otherwise. Defaults to `True`
#>>> a = tlr.from_hgvs("NC_000007.14:g.55181320A>T")
#>>> a.model_dump()
Expand Down Expand Up @@ -452,6 +458,8 @@ def _from_spdi(self, spdi_expr, **kwargs):
To exclude `sequence` from the response, set to 0.
For no limit, set to `None`.
Defaults value set in instance variable, `rle_seq_limit`.
do_normalize (bool): `True` if fully justified normalization should be
performed. `False` otherwise. Defaults to `True`
#>>> a = tlr.from_spdi("NC_000013.11:32936731:1:C")
#>>> a.model_dump()
Expand Down Expand Up @@ -626,8 +634,10 @@ def _post_process_imported_allele(self, allele, **kwargs):
normalization, this sets the limit for the length of the `sequence`.
To exclude `sequence` from the response, set to 0.
For no limit, set to `None`.
do_normalize (bool): `True` if fully justified normalization should be
performed. `False` otherwise. Defaults to `True`
"""
if self.normalize:
if kwargs.get("do_normalize", True):
allele = normalize(
allele,
self.data_proxy,
Expand All @@ -654,10 +664,10 @@ class CnvTranslator(Translator):
"""Class for translating formats from format to VRS Copy Number"""

def __init__(
self, data_proxy, default_assembly_name="GRCh38", normalize=True, identify=True
self, data_proxy, default_assembly_name="GRCh38", identify=True
):
"""Initialize CnvTranslator class"""
super().__init__(data_proxy, default_assembly_name, normalize, identify)
super().__init__(data_proxy, default_assembly_name, identify)
self.from_translators = {
"hgvs": self._from_hgvs,
}
Expand Down
86 changes: 86 additions & 0 deletions tests/extras/cassettes/test_from_spdi.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,86 @@
interactions:
- request:
body: null
headers:
Accept:
- '*/*'
Accept-Encoding:
- gzip, deflate
Connection:
- keep-alive
User-Agent:
- python-requests/2.31.0
method: GET
uri: http://localhost:5000/seqrepo/1/metadata/refseq:NC_000019.10
response:
body:
string: "{\n \"added\": \"2016-08-24T08:19:02Z\",\n \"aliases\": [\n \"Ensembl:19\",\n
\ \"ensembl:19\",\n \"GRCh38:19\",\n \"GRCh38:chr19\",\n \"GRCh38.p1:19\",\n
\ \"GRCh38.p1:chr19\",\n \"GRCh38.p10:19\",\n \"GRCh38.p10:chr19\",\n
\ \"GRCh38.p11:19\",\n \"GRCh38.p11:chr19\",\n \"GRCh38.p12:19\",\n
\ \"GRCh38.p12:chr19\",\n \"GRCh38.p2:19\",\n \"GRCh38.p2:chr19\",\n
\ \"GRCh38.p3:19\",\n \"GRCh38.p3:chr19\",\n \"GRCh38.p4:19\",\n \"GRCh38.p4:chr19\",\n
\ \"GRCh38.p5:19\",\n \"GRCh38.p5:chr19\",\n \"GRCh38.p6:19\",\n \"GRCh38.p6:chr19\",\n
\ \"GRCh38.p7:19\",\n \"GRCh38.p7:chr19\",\n \"GRCh38.p8:19\",\n \"GRCh38.p8:chr19\",\n
\ \"GRCh38.p9:19\",\n \"GRCh38.p9:chr19\",\n \"MD5:b0eba2c7bb5c953d1e06a508b5e487de\",\n
\ \"NCBI:NC_000019.10\",\n \"refseq:NC_000019.10\",\n \"SEGUID:AHxM5/L8jIX08UhBBkKXkiO5rhY\",\n
\ \"SHA1:007c4ce7f2fc8c85f4f148410642979223b9ae16\",\n \"VMC:GS_IIB53T8CNeJJdUqzn9V_JnRtQadwWCbl\",\n
\ \"sha512t24u:IIB53T8CNeJJdUqzn9V_JnRtQadwWCbl\",\n \"ga4gh:SQ.IIB53T8CNeJJdUqzn9V_JnRtQadwWCbl\"\n
\ ],\n \"alphabet\": \"ACGNT\",\n \"length\": 58617616\n}\n"
headers:
Connection:
- close
Content-Length:
- '1035'
Content-Type:
- application/json
Date:
- Tue, 12 Mar 2024 12:34:06 GMT
Server:
- Werkzeug/2.2.2 Python/3.10.4
status:
code: 200
message: OK
- request:
body: null
headers:
Accept:
- '*/*'
Accept-Encoding:
- gzip, deflate
Connection:
- keep-alive
User-Agent:
- python-requests/2.31.0
method: GET
uri: http://localhost:5000/seqrepo/1/metadata/refseq:NC_000013.11
response:
body:
string: "{\n \"added\": \"2016-08-27T23:50:14Z\",\n \"aliases\": [\n \"GRCh38:13\",\n
\ \"GRCh38:chr13\",\n \"GRCh38.p1:13\",\n \"GRCh38.p1:chr13\",\n \"GRCh38.p10:13\",\n
\ \"GRCh38.p10:chr13\",\n \"GRCh38.p11:13\",\n \"GRCh38.p11:chr13\",\n
\ \"GRCh38.p12:13\",\n \"GRCh38.p12:chr13\",\n \"GRCh38.p2:13\",\n
\ \"GRCh38.p2:chr13\",\n \"GRCh38.p3:13\",\n \"GRCh38.p3:chr13\",\n
\ \"GRCh38.p4:13\",\n \"GRCh38.p4:chr13\",\n \"GRCh38.p5:13\",\n \"GRCh38.p5:chr13\",\n
\ \"GRCh38.p6:13\",\n \"GRCh38.p6:chr13\",\n \"GRCh38.p7:13\",\n \"GRCh38.p7:chr13\",\n
\ \"GRCh38.p8:13\",\n \"GRCh38.p8:chr13\",\n \"GRCh38.p9:13\",\n \"GRCh38.p9:chr13\",\n
\ \"MD5:a5437debe2ef9c9ef8f3ea2874ae1d82\",\n \"NCBI:NC_000013.11\",\n
\ \"refseq:NC_000013.11\",\n \"SEGUID:2oDBty0yKV9wHo7gg+Bt+fPgi5o\",\n
\ \"SHA1:da80c1b72d32295f701e8ee083e06df9f3e08b9a\",\n \"VMC:GS__0wi-qoDrvram155UmcSC-zA5ZK4fpLT\",\n
\ \"sha512t24u:_0wi-qoDrvram155UmcSC-zA5ZK4fpLT\",\n \"ga4gh:SQ._0wi-qoDrvram155UmcSC-zA5ZK4fpLT\"\n
\ ],\n \"alphabet\": \"ACGKNTY\",\n \"length\": 114364328\n}\n"
headers:
Connection:
- close
Content-Length:
- '1002'
Content-Type:
- application/json
Date:
- Tue, 12 Mar 2024 12:34:06 GMT
Server:
- Werkzeug/2.2.2 Python/3.10.4
status:
code: 200
message: OK
version: 1
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ interactions:
Connection:
- keep-alive
User-Agent:
- python-requests/2.28.2
- python-requests/2.31.0
method: GET
uri: http://localhost:5000/seqrepo/1/sequence/ga4gh:SQ.F-LrLMe1SRpfUZHkQmvkVKFEGaoDeHul?start=55181219&end=55181220
response:
Expand All @@ -23,7 +23,7 @@ interactions:
Content-Type:
- text/plain; charset=utf-8
Date:
- Mon, 19 Feb 2024 07:31:18 GMT
- Mon, 11 Mar 2024 21:21:23 GMT
Server:
- Werkzeug/2.2.2 Python/3.10.4
status:
Expand All @@ -39,7 +39,7 @@ interactions:
Connection:
- keep-alive
User-Agent:
- python-requests/2.28.2
- python-requests/2.31.0
method: GET
uri: http://localhost:5000/seqrepo/1/sequence/ga4gh:SQ.F-LrLMe1SRpfUZHkQmvkVKFEGaoDeHul?start=55181218&end=55181219
response:
Expand All @@ -53,7 +53,7 @@ interactions:
Content-Type:
- text/plain; charset=utf-8
Date:
- Mon, 19 Feb 2024 07:31:18 GMT
- Mon, 11 Mar 2024 21:21:23 GMT
Server:
- Werkzeug/2.2.2 Python/3.10.4
status:
Expand All @@ -69,7 +69,7 @@ interactions:
Connection:
- keep-alive
User-Agent:
- python-requests/2.28.2
- python-requests/2.31.0
method: GET
uri: http://localhost:5000/seqrepo/1/sequence/ga4gh:SQ.F-LrLMe1SRpfUZHkQmvkVKFEGaoDeHul?start=55181220&end=55181221
response:
Expand All @@ -83,7 +83,7 @@ interactions:
Content-Type:
- text/plain; charset=utf-8
Date:
- Mon, 19 Feb 2024 07:31:18 GMT
- Mon, 11 Mar 2024 21:21:23 GMT
Server:
- Werkzeug/2.2.2 Python/3.10.4
status:
Expand All @@ -99,7 +99,7 @@ interactions:
Connection:
- keep-alive
User-Agent:
- python-requests/2.28.2
- python-requests/2.31.0
method: GET
uri: http://localhost:5000/seqrepo/1/sequence/ga4gh:SQ.F-LrLMe1SRpfUZHkQmvkVKFEGaoDeHul?start=55181219&end=55181219
response:
Expand All @@ -113,7 +113,7 @@ interactions:
Content-Type:
- text/plain; charset=utf-8
Date:
- Mon, 19 Feb 2024 07:31:18 GMT
- Mon, 11 Mar 2024 21:21:23 GMT
Server:
- Werkzeug/2.2.2 Python/3.10.4
status:
Expand All @@ -129,7 +129,7 @@ interactions:
Connection:
- keep-alive
User-Agent:
- python-requests/2.28.2
- python-requests/2.31.0
method: GET
uri: http://localhost:5000/seqrepo/1/sequence/ga4gh:SQ.F-LrLMe1SRpfUZHkQmvkVKFEGaoDeHul?start=55181220&end=55181220
response:
Expand All @@ -143,7 +143,7 @@ interactions:
Content-Type:
- text/plain; charset=utf-8
Date:
- Mon, 19 Feb 2024 07:31:18 GMT
- Mon, 11 Mar 2024 21:21:23 GMT
Server:
- Werkzeug/2.2.2 Python/3.10.4
status:
Expand All @@ -159,7 +159,7 @@ interactions:
Connection:
- keep-alive
User-Agent:
- python-requests/2.28.2
- python-requests/2.31.0
method: GET
uri: https://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi?db=nucleotide&id=NC_000007.14&rettype=fasta&seq_start=55181220&seq_stop=55181220&tool=bioutils&email=biocommons-dev@googlegroups.com
response:
Expand All @@ -183,20 +183,20 @@ interactions:
Content-Type:
- text/plain
Date:
- Mon, 19 Feb 2024 07:31:18 GMT
- Mon, 11 Mar 2024 21:21:23 GMT
Keep-Alive:
- timeout=4, max=40
NCBI-PHID:
- 322CF705C067C1A500004E8953DC8714.1.1.m_5
- 322CFCD26EC09885000045491929D908.1.1.m_5
NCBI-SID:
- F908C0590198759C_1297SID
- CC70304937911EDE_9EBESID
Referrer-Policy:
- origin-when-cross-origin
Server:
- Finatra
Set-Cookie:
- ncbi_sid=F908C0590198759C_1297SID; domain=.nih.gov; path=/; expires=Wed, 19
Feb 2025 07:31:18 GMT
- ncbi_sid=CC70304937911EDE_9EBESID; domain=.nih.gov; path=/; expires=Tue, 11
Mar 2025 21:21:23 GMT
Strict-Transport-Security:
- max-age=31536000; includeSubDomains; preload
Transfer-Encoding:
Expand Down Expand Up @@ -224,7 +224,7 @@ interactions:
Connection:
- keep-alive
User-Agent:
- python-requests/2.28.2
- python-requests/2.31.0
method: GET
uri: https://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi?db=nucleotide&id=NC_000007.14&rettype=fasta&seq_start=55181220&seq_stop=55181240&tool=bioutils&email=biocommons-dev@googlegroups.com
response:
Expand All @@ -249,20 +249,20 @@ interactions:
Content-Type:
- text/plain
Date:
- Mon, 19 Feb 2024 07:31:18 GMT
- Mon, 11 Mar 2024 21:21:24 GMT
Keep-Alive:
- timeout=4, max=40
NCBI-PHID:
- 322CF705C067C1A500003B8959035020.1.1.m_5
- 322CFCD26EC09885000059491D137F58.1.1.m_5
NCBI-SID:
- 0FA55D02B01B7AFC_8F66SID
- 11F6059A8AEDFE53_CBBESID
Referrer-Policy:
- origin-when-cross-origin
Server:
- Finatra
Set-Cookie:
- ncbi_sid=0FA55D02B01B7AFC_8F66SID; domain=.nih.gov; path=/; expires=Wed, 19
Feb 2025 07:31:19 GMT
- ncbi_sid=11F6059A8AEDFE53_CBBESID; domain=.nih.gov; path=/; expires=Tue, 11
Mar 2025 21:21:24 GMT
Strict-Transport-Security:
- max-age=31536000; includeSubDomains; preload
Transfer-Encoding:
Expand Down
Loading

0 comments on commit cef704d

Please sign in to comment.