Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat!: parameterize normalize in translator #361

Merged
merged 2 commits into from
Mar 13, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
24 changes: 17 additions & 7 deletions src/ga4gh/vrs/extras/translator.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,14 +54,12 @@ def __init__(
self,
data_proxy,
default_assembly_name="GRCh38",
normalize=True,
identify=True,
rle_seq_limit: Optional[int] = 50
):
self.default_assembly_name = default_assembly_name
self.data_proxy = data_proxy
self.identify = identify
self.normalize = normalize
self.rle_seq_limit = rle_seq_limit
self.hgvs_tools = None
self.from_translators = {}
Expand Down Expand Up @@ -127,6 +125,8 @@ def translate_from(self, var, fmt=None, **kwargs):
To exclude `sequence` from the response, set to 0.
For no limit, set to `None`.
Defaults value set in instance variable, `rle_seq_limit`.
do_normalize (bool): `True` if fully justified normalization should be
performed. `False` otherwise. Defaults to `True`
"""
if fmt:
try:
Expand Down Expand Up @@ -225,10 +225,10 @@ class AlleleTranslator(Translator):
"""Class for translating formats to and from VRS Alleles"""

def __init__(
self, data_proxy, default_assembly_name="GRCh38", normalize=True, identify=True
self, data_proxy, default_assembly_name="GRCh38", identify=True
):
"""Initialize AlleleTranslator class"""
super().__init__(data_proxy, default_assembly_name, normalize, identify)
super().__init__(data_proxy, default_assembly_name, identify)

self.from_translators = {
"beacon": self._from_beacon,
Expand All @@ -253,6 +253,8 @@ def _from_beacon(self, beacon_expr, **kwargs):
To exclude `sequence` from the response, set to 0.
For no limit, set to `None`.
Defaults value set in instance variable, `rle_seq_limit`.
do_normalize (bool): `True` if fully justified normalization should be
performed. `False` otherwise. Defaults to `True`

#>>> a = tlr.from_beacon("19 : 44908822 C > T")
#>>> a.model_dump()
Expand Down Expand Up @@ -316,6 +318,8 @@ def _from_gnomad(self, gnomad_expr, **kwargs):
To exclude `sequence` from the response, set to 0.
For no limit, set to `None`.
Defaults value set in instance variable, `rle_seq_limit`.
do_normalize (bool): `True` if fully justified normalization should be
performed. `False` otherwise. Defaults to `True`

#>>> a = tlr.from_gnomad("1-55516888-G-GA")
#>>> a.model_dump()
Expand Down Expand Up @@ -378,6 +382,8 @@ def _from_hgvs(self, hgvs_expr: str, **kwargs):
To exclude `sequence` from the response, set to 0.
For no limit, set to `None`.
Defaults value set in instance variable, `rle_seq_limit`.
do_normalize (bool): `True` if fully justified normalization should be
performed. `False` otherwise. Defaults to `True`

#>>> a = tlr.from_hgvs("NC_000007.14:g.55181320A>T")
#>>> a.model_dump()
Expand Down Expand Up @@ -452,6 +458,8 @@ def _from_spdi(self, spdi_expr, **kwargs):
To exclude `sequence` from the response, set to 0.
For no limit, set to `None`.
Defaults value set in instance variable, `rle_seq_limit`.
do_normalize (bool): `True` if fully justified normalization should be
performed. `False` otherwise. Defaults to `True`

#>>> a = tlr.from_spdi("NC_000013.11:32936731:1:C")
#>>> a.model_dump()
Expand Down Expand Up @@ -626,8 +634,10 @@ def _post_process_imported_allele(self, allele, **kwargs):
normalization, this sets the limit for the length of the `sequence`.
To exclude `sequence` from the response, set to 0.
For no limit, set to `None`.
do_normalize (bool): `True` if fully justified normalization should be
performed. `False` otherwise. Defaults to `True`
"""
if self.normalize:
if kwargs.get("do_normalize", True):
allele = normalize(
allele,
self.data_proxy,
Expand All @@ -654,10 +664,10 @@ class CnvTranslator(Translator):
"""Class for translating formats from format to VRS Copy Number"""

def __init__(
self, data_proxy, default_assembly_name="GRCh38", normalize=True, identify=True
self, data_proxy, default_assembly_name="GRCh38", identify=True
):
"""Initialize CnvTranslator class"""
super().__init__(data_proxy, default_assembly_name, normalize, identify)
super().__init__(data_proxy, default_assembly_name, identify)
self.from_translators = {
"hgvs": self._from_hgvs,
}
Expand Down
86 changes: 86 additions & 0 deletions tests/extras/cassettes/test_from_spdi.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,86 @@
interactions:
- request:
body: null
headers:
Accept:
- '*/*'
Accept-Encoding:
- gzip, deflate
Connection:
- keep-alive
User-Agent:
- python-requests/2.31.0
method: GET
uri: http://localhost:5000/seqrepo/1/metadata/refseq:NC_000019.10
response:
body:
string: "{\n \"added\": \"2016-08-24T08:19:02Z\",\n \"aliases\": [\n \"Ensembl:19\",\n
\ \"ensembl:19\",\n \"GRCh38:19\",\n \"GRCh38:chr19\",\n \"GRCh38.p1:19\",\n
\ \"GRCh38.p1:chr19\",\n \"GRCh38.p10:19\",\n \"GRCh38.p10:chr19\",\n
\ \"GRCh38.p11:19\",\n \"GRCh38.p11:chr19\",\n \"GRCh38.p12:19\",\n
\ \"GRCh38.p12:chr19\",\n \"GRCh38.p2:19\",\n \"GRCh38.p2:chr19\",\n
\ \"GRCh38.p3:19\",\n \"GRCh38.p3:chr19\",\n \"GRCh38.p4:19\",\n \"GRCh38.p4:chr19\",\n
\ \"GRCh38.p5:19\",\n \"GRCh38.p5:chr19\",\n \"GRCh38.p6:19\",\n \"GRCh38.p6:chr19\",\n
\ \"GRCh38.p7:19\",\n \"GRCh38.p7:chr19\",\n \"GRCh38.p8:19\",\n \"GRCh38.p8:chr19\",\n
\ \"GRCh38.p9:19\",\n \"GRCh38.p9:chr19\",\n \"MD5:b0eba2c7bb5c953d1e06a508b5e487de\",\n
\ \"NCBI:NC_000019.10\",\n \"refseq:NC_000019.10\",\n \"SEGUID:AHxM5/L8jIX08UhBBkKXkiO5rhY\",\n
\ \"SHA1:007c4ce7f2fc8c85f4f148410642979223b9ae16\",\n \"VMC:GS_IIB53T8CNeJJdUqzn9V_JnRtQadwWCbl\",\n
\ \"sha512t24u:IIB53T8CNeJJdUqzn9V_JnRtQadwWCbl\",\n \"ga4gh:SQ.IIB53T8CNeJJdUqzn9V_JnRtQadwWCbl\"\n
\ ],\n \"alphabet\": \"ACGNT\",\n \"length\": 58617616\n}\n"
headers:
Connection:
- close
Content-Length:
- '1035'
Content-Type:
- application/json
Date:
- Tue, 12 Mar 2024 12:34:06 GMT
Server:
- Werkzeug/2.2.2 Python/3.10.4
status:
code: 200
message: OK
- request:
body: null
headers:
Accept:
- '*/*'
Accept-Encoding:
- gzip, deflate
Connection:
- keep-alive
User-Agent:
- python-requests/2.31.0
method: GET
uri: http://localhost:5000/seqrepo/1/metadata/refseq:NC_000013.11
response:
body:
string: "{\n \"added\": \"2016-08-27T23:50:14Z\",\n \"aliases\": [\n \"GRCh38:13\",\n
\ \"GRCh38:chr13\",\n \"GRCh38.p1:13\",\n \"GRCh38.p1:chr13\",\n \"GRCh38.p10:13\",\n
\ \"GRCh38.p10:chr13\",\n \"GRCh38.p11:13\",\n \"GRCh38.p11:chr13\",\n
\ \"GRCh38.p12:13\",\n \"GRCh38.p12:chr13\",\n \"GRCh38.p2:13\",\n
\ \"GRCh38.p2:chr13\",\n \"GRCh38.p3:13\",\n \"GRCh38.p3:chr13\",\n
\ \"GRCh38.p4:13\",\n \"GRCh38.p4:chr13\",\n \"GRCh38.p5:13\",\n \"GRCh38.p5:chr13\",\n
\ \"GRCh38.p6:13\",\n \"GRCh38.p6:chr13\",\n \"GRCh38.p7:13\",\n \"GRCh38.p7:chr13\",\n
\ \"GRCh38.p8:13\",\n \"GRCh38.p8:chr13\",\n \"GRCh38.p9:13\",\n \"GRCh38.p9:chr13\",\n
\ \"MD5:a5437debe2ef9c9ef8f3ea2874ae1d82\",\n \"NCBI:NC_000013.11\",\n
\ \"refseq:NC_000013.11\",\n \"SEGUID:2oDBty0yKV9wHo7gg+Bt+fPgi5o\",\n
\ \"SHA1:da80c1b72d32295f701e8ee083e06df9f3e08b9a\",\n \"VMC:GS__0wi-qoDrvram155UmcSC-zA5ZK4fpLT\",\n
\ \"sha512t24u:_0wi-qoDrvram155UmcSC-zA5ZK4fpLT\",\n \"ga4gh:SQ._0wi-qoDrvram155UmcSC-zA5ZK4fpLT\"\n
\ ],\n \"alphabet\": \"ACGKNTY\",\n \"length\": 114364328\n}\n"
headers:
Connection:
- close
Content-Length:
- '1002'
Content-Type:
- application/json
Date:
- Tue, 12 Mar 2024 12:34:06 GMT
Server:
- Werkzeug/2.2.2 Python/3.10.4
status:
code: 200
message: OK
version: 1
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ interactions:
Connection:
- keep-alive
User-Agent:
- python-requests/2.28.2
- python-requests/2.31.0
method: GET
uri: http://localhost:5000/seqrepo/1/sequence/ga4gh:SQ.F-LrLMe1SRpfUZHkQmvkVKFEGaoDeHul?start=55181219&end=55181220
response:
Expand All @@ -23,7 +23,7 @@ interactions:
Content-Type:
- text/plain; charset=utf-8
Date:
- Mon, 19 Feb 2024 07:31:18 GMT
- Mon, 11 Mar 2024 21:21:23 GMT
Server:
- Werkzeug/2.2.2 Python/3.10.4
status:
Expand All @@ -39,7 +39,7 @@ interactions:
Connection:
- keep-alive
User-Agent:
- python-requests/2.28.2
- python-requests/2.31.0
method: GET
uri: http://localhost:5000/seqrepo/1/sequence/ga4gh:SQ.F-LrLMe1SRpfUZHkQmvkVKFEGaoDeHul?start=55181218&end=55181219
response:
Expand All @@ -53,7 +53,7 @@ interactions:
Content-Type:
- text/plain; charset=utf-8
Date:
- Mon, 19 Feb 2024 07:31:18 GMT
- Mon, 11 Mar 2024 21:21:23 GMT
Server:
- Werkzeug/2.2.2 Python/3.10.4
status:
Expand All @@ -69,7 +69,7 @@ interactions:
Connection:
- keep-alive
User-Agent:
- python-requests/2.28.2
- python-requests/2.31.0
method: GET
uri: http://localhost:5000/seqrepo/1/sequence/ga4gh:SQ.F-LrLMe1SRpfUZHkQmvkVKFEGaoDeHul?start=55181220&end=55181221
response:
Expand All @@ -83,7 +83,7 @@ interactions:
Content-Type:
- text/plain; charset=utf-8
Date:
- Mon, 19 Feb 2024 07:31:18 GMT
- Mon, 11 Mar 2024 21:21:23 GMT
Server:
- Werkzeug/2.2.2 Python/3.10.4
status:
Expand All @@ -99,7 +99,7 @@ interactions:
Connection:
- keep-alive
User-Agent:
- python-requests/2.28.2
- python-requests/2.31.0
method: GET
uri: http://localhost:5000/seqrepo/1/sequence/ga4gh:SQ.F-LrLMe1SRpfUZHkQmvkVKFEGaoDeHul?start=55181219&end=55181219
response:
Expand All @@ -113,7 +113,7 @@ interactions:
Content-Type:
- text/plain; charset=utf-8
Date:
- Mon, 19 Feb 2024 07:31:18 GMT
- Mon, 11 Mar 2024 21:21:23 GMT
Server:
- Werkzeug/2.2.2 Python/3.10.4
status:
Expand All @@ -129,7 +129,7 @@ interactions:
Connection:
- keep-alive
User-Agent:
- python-requests/2.28.2
- python-requests/2.31.0
method: GET
uri: http://localhost:5000/seqrepo/1/sequence/ga4gh:SQ.F-LrLMe1SRpfUZHkQmvkVKFEGaoDeHul?start=55181220&end=55181220
response:
Expand All @@ -143,7 +143,7 @@ interactions:
Content-Type:
- text/plain; charset=utf-8
Date:
- Mon, 19 Feb 2024 07:31:18 GMT
- Mon, 11 Mar 2024 21:21:23 GMT
Server:
- Werkzeug/2.2.2 Python/3.10.4
status:
Expand All @@ -159,7 +159,7 @@ interactions:
Connection:
- keep-alive
User-Agent:
- python-requests/2.28.2
- python-requests/2.31.0
method: GET
uri: https://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi?db=nucleotide&id=NC_000007.14&rettype=fasta&seq_start=55181220&seq_stop=55181220&tool=bioutils&email=biocommons-dev@googlegroups.com
response:
Expand All @@ -183,20 +183,20 @@ interactions:
Content-Type:
- text/plain
Date:
- Mon, 19 Feb 2024 07:31:18 GMT
- Mon, 11 Mar 2024 21:21:23 GMT
Keep-Alive:
- timeout=4, max=40
NCBI-PHID:
- 322CF705C067C1A500004E8953DC8714.1.1.m_5
- 322CFCD26EC09885000045491929D908.1.1.m_5
NCBI-SID:
- F908C0590198759C_1297SID
- CC70304937911EDE_9EBESID
Referrer-Policy:
- origin-when-cross-origin
Server:
- Finatra
Set-Cookie:
- ncbi_sid=F908C0590198759C_1297SID; domain=.nih.gov; path=/; expires=Wed, 19
Feb 2025 07:31:18 GMT
- ncbi_sid=CC70304937911EDE_9EBESID; domain=.nih.gov; path=/; expires=Tue, 11
Mar 2025 21:21:23 GMT
Strict-Transport-Security:
- max-age=31536000; includeSubDomains; preload
Transfer-Encoding:
Expand Down Expand Up @@ -224,7 +224,7 @@ interactions:
Connection:
- keep-alive
User-Agent:
- python-requests/2.28.2
- python-requests/2.31.0
method: GET
uri: https://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi?db=nucleotide&id=NC_000007.14&rettype=fasta&seq_start=55181220&seq_stop=55181240&tool=bioutils&email=biocommons-dev@googlegroups.com
response:
Expand All @@ -249,20 +249,20 @@ interactions:
Content-Type:
- text/plain
Date:
- Mon, 19 Feb 2024 07:31:18 GMT
- Mon, 11 Mar 2024 21:21:24 GMT
Keep-Alive:
- timeout=4, max=40
NCBI-PHID:
- 322CF705C067C1A500003B8959035020.1.1.m_5
- 322CFCD26EC09885000059491D137F58.1.1.m_5
NCBI-SID:
- 0FA55D02B01B7AFC_8F66SID
- 11F6059A8AEDFE53_CBBESID
Referrer-Policy:
- origin-when-cross-origin
Server:
- Finatra
Set-Cookie:
- ncbi_sid=0FA55D02B01B7AFC_8F66SID; domain=.nih.gov; path=/; expires=Wed, 19
Feb 2025 07:31:19 GMT
- ncbi_sid=11F6059A8AEDFE53_CBBESID; domain=.nih.gov; path=/; expires=Tue, 11
Mar 2025 21:21:24 GMT
Strict-Transport-Security:
- max-age=31536000; includeSubDomains; preload
Transfer-Encoding:
Expand Down
Loading
Loading