Skip to content

Commit

Permalink
cleanup: resolve pylint warnings (#149)
Browse files Browse the repository at this point in the history
* Resolved easy pylint warnings
* For those that had `too-many` warnings, I just disabled
* Disable `fixme` for pylint
- Name changes for variable names / imports
  • Loading branch information
korikuzma authored Mar 8, 2023
1 parent 64a8063 commit c9880ef
Show file tree
Hide file tree
Showing 13 changed files with 106 additions and 74 deletions.
10 changes: 8 additions & 2 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
@@ -1,10 +1,16 @@
# See https://pre-commit.com for more information
# See https://pre-commit.com/hooks.html for more hooks
repos:
- repo: https://github.com/pycqa/pylint
rev: v2.15.9
- repo: local
hooks:
- id: pylint
name: pylint
entry: pylint
language: system
types: [python]
require_serial: true
exclude: ^tests/
args:
- src/ga4gh/core
- src/ga4gh/vrs
- --fail-under=10
12 changes: 12 additions & 0 deletions .pylintrc
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,18 @@ max-line-length=120
# Maximum number of lines in a module.
max-module-lines=1000

[MESSAGES CONTROL]

# Disable the message, report, category or checker with the given id(s). You
# can either give multiple identifiers separated by comma (,) or put this
# option multiple times (only on the command line, not in the configuration
# file where it should appear only once). You can also use "--disable=all" to
# disable everything first and then re-enable specific checks. For example, if
# you want to run only the similarities checker, you can use "--disable=all
# --enable=similarities". If you want to run only the classes checker, but have
# no Warning level messages displayed, use "--disable=all --enable=classes
# --disable=W".
disable=fixme,protected-access

[STRING]

Expand Down
2 changes: 1 addition & 1 deletion src/ga4gh/core/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
parse_ga4gh_identifier)
from ._internal.jsonschema import (build_models, build_class_referable_attribute_map, is_pjs_class, is_pjs_instance,
is_curie, is_identifiable, is_literal, pjs_copy)

# pylint: disable=duplicate-code
try:
__version__ = get_distribution(__name__).version
except DistributionNotFound: # pragma: nocover
Expand Down
7 changes: 4 additions & 3 deletions src/ga4gh/core/_internal/identifiers.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,8 @@

# Assume that ga4gh.yaml and vrs.yaml files are in the same directory for now
schema_dir = os.environ.get("VRS_SCHEMA_DIR", pkg_resources.resource_filename(__name__, "data/schema"))
cfg = yaml.safe_load(open(schema_dir + "/ga4gh.yaml"))
with open(schema_dir + "/ga4gh.yaml", "r", encoding="utf-8") as f:
cfg = yaml.safe_load(f)
type_prefix_map_default = cfg["identifiers"]["type_prefix_map"]
namespace = cfg["identifiers"]["namespace"]
curie_sep = cfg["identifiers"]["curie_sep"]
Expand Down Expand Up @@ -144,7 +145,7 @@ def ga4gh_serialize(vro):
"""

def dictify(vro, enref=True):
def dictify(vro, enref=True): # pylint: disable=too-many-return-statements,too-many-branches
"""recursively converts (any) object to dictionary prior to
serialization
Expand Down Expand Up @@ -184,7 +185,7 @@ def dictify(vro, enref=True):
if is_curie_type(vro[0]):
return sorted(dictify(o) for o in vro.data)

arr = list()
arr = []
for o in vro.typed_elems:
d = dictify(o)
if isinstance(d, dict):
Expand Down
2 changes: 1 addition & 1 deletion src/ga4gh/core/_internal/jsonschema.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@

def build_models(path, standardize_names=False):
"""load models from json schema at path"""
with open(path, "r") as yaml_file:
with open(path, "r", encoding="utf-8") as yaml_file:
y = yaml.load(yaml_file, yaml.SafeLoader)
builder = pjs.ObjectBuilder(pjs_filter(y))
models = builder.build_classes(standardize_names=standardize_names)
Expand Down
2 changes: 1 addition & 1 deletion src/ga4gh/vrs/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,4 +23,4 @@

logging.basicConfig(
filename="vrs-python.log",
format="[%(asctime)s] - %(name)s - %(levelname)s : %(message)s")
format="[%(asctime)s] - %(name)s - %(levelname)s : %(message)s")
4 changes: 2 additions & 2 deletions src/ga4gh/vrs/dataproxy.py
Original file line number Diff line number Diff line change
Expand Up @@ -152,7 +152,7 @@ def _get_sequence(self, identifier, start=None, end=None):
url = self.base_url + f"sequence/{identifier}"
_logger.info("Fetching %s", url)
params = {"start": start, "end": end}
resp = requests.get(url, params=params)
resp = requests.get(url, params=params, timeout=5)
if resp.status_code == 404:
raise KeyError(identifier)
resp.raise_for_status()
Expand All @@ -161,7 +161,7 @@ def _get_sequence(self, identifier, start=None, end=None):
def _get_metadata(self, identifier):
url = self.base_url + f"metadata/{identifier}"
_logger.info("Fetching %s", url)
resp = requests.get(url)
resp = requests.get(url, timeout=5)
if resp.status_code == 404:
raise KeyError(identifier)
resp.raise_for_status()
Expand Down
47 changes: 29 additions & 18 deletions src/ga4gh/vrs/extras/localizer.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,6 @@
"""convert named locations into SequenceLocations ("localize") by
reference to external data
"""convert named locations into SequenceLocations ("localize") by reference to external
data
"""

import copy

from bioutils.accessions import coerce_namespace
from bioutils.assemblies import make_name_ac_map
from bioutils.cytobands import get_cytoband_maps
Expand Down Expand Up @@ -45,27 +41,25 @@ def __init__(self):
self._ana_maps = {k: make_name_ac_map(k) for k in assy_name_to_map_name}


def localize_allele(self, allele):
def localize_allele(self, allele, assembly_name = "GRCh38"):
"""Converts VRS Allele's named features to sequence location"""
# copy input variant and replace location
# N.B. deepcopy leads to recursion errors
allele_sl = ga4gh.vrs.models.Variation(**allele.as_dict())
del allele_sl._id
allele_sl.location = self.localize(allele.location)
allele_sl.location = self.localize_named_feature(allele.location, assembly_name)
return allele_sl


def localize_named_feature(self, loc, assembly_name):
"""converts named features to sequence locations
"""

"""converts named features to sequence locations"""
assert loc.type._value == "ChromosomeLocation", "Expected a ChromosomeLocation object"

def _get_coords(m, cb):
"""return (start,end) of band `cb` in map `m`"""
if cb is None:
return None
return chr_cb_map[cb][0:2]
return m[cb][0:2]

try:
map_name = assy_name_to_map_name[assembly_name]
Expand Down Expand Up @@ -98,7 +92,7 @@ def _get_coords(m, cb):
try:
ac = self._ana_maps[assembly_name][loc.chr]
except KeyError as e:
raise ValueError(f"No accssion for {loc.chr} in assembly {assembly_name}") from e
raise ValueError(f"No accession for {loc.chr} in assembly {assembly_name}") from e

return ga4gh.vrs.models.SequenceLocation(
sequence_id=coerce_namespace(ac),
Expand All @@ -108,9 +102,26 @@ def _get_coords(m, cb):
)





if __name__ == "__main__":
cbl = ga4gh.vrs.models.ChromosomeLocation(chr="11", start="q22.3", end="q23.1")
lr = Localizer()

allele_dict = {
"location": {
"chr":"19",
"interval": {
"end": "q13.32",
"start": "q13.32",
"type": "CytobandInterval"
},
"species_id":"taxonomy:9606",
"type":"ChromosomeLocation"
},
"state": {
"sequence": "T",
"type": "LiteralSequenceExpression"
},
"type": "Allele"
}
allele_vo = ga4gh.vrs.models.Allele(**allele_dict)
allele_with_seq_loc = lr.localize_allele(allele_vo)
print("Allele with SequenceLocation:", allele_with_seq_loc.as_dict())
45 changes: 23 additions & 22 deletions src/ga4gh/vrs/extras/translator.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@
import hgvs.dataproviders.uta

from ga4gh.core import ga4gh_identify
from ga4gh.vrs import models, normalize
from ga4gh.vrs import models, normalize as do_normalize
from ga4gh.vrs.extras.decorators import lazy_property # this should be relocated
from ga4gh.vrs.utils.hgvs_tools import HgvsTools

Expand All @@ -28,8 +28,6 @@
class ValidationError(Exception):
"""Class for validation errors during translation"""

pass


class Translator:
"""Translates various variation formats to and from GA4GH VRS models
Expand All @@ -48,7 +46,7 @@ class Translator:
spdi_re = re.compile(r"(?P<ac>[^:]+):(?P<pos>\d+):(?P<del_len_or_seq>\w*):(?P<ins_seq>\w*)")


def __init__(self,
def __init__(self, # pylint: disable=too-many-arguments
data_proxy,
default_assembly_name="GRCh38",
translate_sequence_identifiers=True,
Expand Down Expand Up @@ -82,7 +80,7 @@ def translate_from(self, var, fmt=None, require_validation=True):
raise ValueError(f"Unable to parse data as {fmt} variation")
return o

for fmt, t in self.from_translators.items():
for _, t in self.from_translators.items():
o = t(self, var, require_validation=require_validation)
if o:
return o
Expand Down Expand Up @@ -116,7 +114,7 @@ def ensure_allele_is_latest_model(self, allele):
############################################################################
## INTERNAL

def _from_beacon(self, beacon_expr, assembly_name=None, require_validation=True):
def _from_beacon(self, beacon_expr, assembly_name=None, require_validation=True): # pylint: disable=too-many-locals, unused-argument
"""Parse beacon expression into VRS Allele
#>>> a = tlr.from_beacon("19 : 44908822 C > T")
Expand Down Expand Up @@ -158,7 +156,7 @@ def _from_beacon(self, beacon_expr, assembly_name=None, require_validation=True)
return allele


def _from_gnomad(self, gnomad_expr, assembly_name=None, require_validation=True):
def _from_gnomad(self, gnomad_expr, assembly_name=None, require_validation=True): # pylint: disable=unused-argument, too-many-locals
"""Parse gnomAD-style VCF expression into VRS Allele
:param str gnomad_expr: chr-pos-ref-alt
Expand Down Expand Up @@ -208,7 +206,7 @@ def _from_gnomad(self, gnomad_expr, assembly_name=None, require_validation=True)
return allele


def _from_hgvs(self, hgvs_expr, require_validation=True):
def _from_hgvs(self, hgvs_expr, require_validation=True): # pylint: disable=unused-argument
"""parse hgvs into a VRS object (typically an Allele)
#>>> a = tlr.from_hgvs("NM_012345.6:c.22A>T")
Expand All @@ -233,7 +231,7 @@ def _from_hgvs(self, hgvs_expr, require_validation=True):
if not self.hgvs_re.match(hgvs_expr):
return None

sv = self._hgvs_parser.parse_hgvs_variant(hgvs_expr)
sv = self._hgvs_parser.parse_hgvs_variant(hgvs_expr) # pylint: disable=no-member

# prefix accession with namespace
sequence_id = coerce_namespace(sv.ac)
Expand Down Expand Up @@ -277,7 +275,7 @@ def _from_hgvs(self, hgvs_expr, require_validation=True):
return allele


def _from_spdi(self, spdi_expr, require_validation=True):
def _from_spdi(self, spdi_expr, require_validation=True): # pylint: disable=unused-argument

"""Parse SPDI expression in to a GA4GH Allele
Expand Down Expand Up @@ -322,7 +320,7 @@ def _from_spdi(self, spdi_expr, require_validation=True):
return allele


def _from_vrs(self, var, require_validation=True):
def _from_vrs(self, var, require_validation=True): # pylint: disable=unused-argument
"""convert from dict representation of VRS JSON to VRS object"""
if not isinstance(var, Mapping):
return None
Expand All @@ -342,7 +340,7 @@ def _get_hgvs_tools(self):
self.hgvs_tools = HgvsTools(uta_conn)
return self.hgvs_tools

def _to_hgvs(self, vo, namespace="refseq"):
def _to_hgvs(self, vo, namespace="refseq"): # pylint: disable=too-many-locals
"""generates a *list* of HGVS expressions for VRS Allele.
If `namespace` is not None, returns HGVS strings for the
Expand Down Expand Up @@ -385,11 +383,11 @@ def ir_stype(a):
stype = stypes[0]

# build interval and edit depending on sequence type
if stype == "p":
if stype == "p": # pylint: disable=no-else-raise
raise ValueError("Only nucleic acid variation is currently supported")
# ival = hgvs.location.Interval(start=start, end=end)
# edit = hgvs.edit.AARefAlt(ref=None, alt=vo.state.sequence)
else: # pylint: disable=no-else-raise
else:
start, end = vo.location.interval.start.value, vo.location.interval.end.value
# ib: 0 1 2 3 4 5
# h: 1 2 3 4 5
Expand Down Expand Up @@ -421,7 +419,7 @@ def ir_stype(a):
if ns.startswith("GRC") and namespace is None:
continue

if not (any(a.startswith(pfx) for pfx in ("NM", "NP", "NC", "NG"))):
if not any(a.startswith(pfx) for pfx in ("NM", "NP", "NC", "NG")):
continue

var.ac = a
Expand All @@ -435,7 +433,7 @@ def ir_stype(a):

hgvs_exprs += [str(var)]
except hgvs.exceptions.HGVSDataNotAvailableError:
_logger.warning(f"No data found for accession {a}")
_logger.warning("No data found for accession %s", a)

return list(set(hgvs_exprs))

Expand Down Expand Up @@ -493,6 +491,9 @@ def _is_valid_ref_seq(self, sequence_id: str, start_pos: int, end_pos: int,


def is_valid_allele(self, vo):
"""Ensure that `vo` is a valid VRS Allele with SequenceLocation and
LiteralSequenceExpression
"""
return (vo.type == "Allele"
and vo.location.type == "SequenceLocation"
and vo.state.type == "LiteralSequenceExpression")
Expand All @@ -515,7 +516,7 @@ def _post_process_imported_allele(self, allele):
allele.location.sequence_id = seq_id

if self.normalize:
allele = normalize(allele, self.data_proxy)
allele = do_normalize(allele, self.data_proxy)

if self.identify:
allele._id = ga4gh_identify(allele)
Expand Down Expand Up @@ -583,16 +584,16 @@ def _seq_id_mapper(self, ir):
"type": "SequenceInterval"
}
]
formats = ["hgvs", "gnomad", "beacon", "spdi", "vrs", None]
from_formats = ["hgvs", "gnomad", "beacon", "spdi", "vrs", None]

for e in expressions:
print(f"* {e}")
for f in formats:
for f in from_formats:
try:
o = tlr.translate_from(e, f)
r = o.type
vrs_obj = tlr.translate_from(e, f)
r = vrs_obj.type
except ValueError:
r = "-"
except Exception as ex:
except Exception as ex: # pylint: disable=broad-except
r = ex.__class__.__name__
print(f" {f}: {r}")
6 changes: 3 additions & 3 deletions src/ga4gh/vrs/extras/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,19 +35,19 @@ def _format_time(timespan, precision=3):
value = int(leftover / length)
if value > 0:
leftover = leftover % length
time.append(u"%s%s" % (str(value), suffix))
time.append(f"{value}{suffix}")
if leftover < 1:
break
return " ".join(time)

units = [u"s", u"ms", u"us", u"ns"] # the save value
units = ["s", "ms", "us", "ns"] # the save value
scaling = [1, 1e3, 1e6, 1e9]

if timespan > 0.0:
order = min(-int(math.floor(math.log10(timespan)) // 3), 3)
else:
order = 3
return u"%.*g %s" % (precision, timespan * scaling[order], units[order])
return f"{timespan * scaling[order]:.{precision}g} {units[order]}"


def hex_to_base64url(s):
Expand Down
Loading

0 comments on commit c9880ef

Please sign in to comment.