Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

cleanup: add pre-commit + resolve pylint warnings #210

Merged
merged 4 commits into from
Jul 13, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 16 additions & 0 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
# See https://pre-commit.com for more information
# See https://pre-commit.com/hooks.html for more hooks
repos:
- repo: local
hooks:
- id: pylint
name: pylint
entry: pylint
language: system
types: [python]
require_serial: true
exclude: ^tests/
args:
- src/ga4gh/core
- src/ga4gh/vrs
- --fail-under=10
16 changes: 14 additions & 2 deletions .pylintrc
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,18 @@ max-line-length=120
# Maximum number of lines in a module.
max-module-lines=1000

[MESSAGES CONTROL]

# Disable the message, report, category or checker with the given id(s). You
# can either give multiple identifiers separated by comma (,) or put this
# option multiple times (only on the command line, not in the configuration
# file where it should appear only once). You can also use "--disable=all" to
# disable everything first and then re-enable specific checks. For example, if
# you want to run only the similarities checker, you can use "--disable=all
# --enable=similarities". If you want to run only the classes checker, but have
# no Warning level messages displayed, use "--disable=all --enable=classes
# --disable=W".
disable=fixme,protected-access,too-few-public-methods

[STRING]

Expand Down Expand Up @@ -138,5 +150,5 @@ min-public-methods=2

# Exceptions that will emit a warning when being caught. Defaults to
# "BaseException, Exception".
overgeneral-exceptions=BaseException,
Exception
overgeneral-exceptions=builtins.BaseException,
builtins.Exception
8 changes: 8 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -133,6 +133,14 @@ Fork the repo at https://github.com/ga4gh/vrs-python/ .
$ cd vrs-python
$ make devready

## Pre-commit

We use [pre-commit](https://pre-commit.com/) to check code style.

Before first commit, run:

$ pre-commit install

## Testing

This package implements typical unit tests for ga4gh.core and ga4gh.vrs. This
Expand Down
1 change: 1 addition & 0 deletions setup.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -97,6 +97,7 @@ dev =
tox
vcrpy
yapf
pre-commit
extras =
psycopg2-binary
biocommons.seqrepo>=0.5.1
Expand Down
2 changes: 1 addition & 1 deletion src/ga4gh/core/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
parse_ga4gh_identifier)
from ._internal.jsonschema import (build_models, build_class_referable_attribute_map, is_pjs_class, is_pjs_instance,
is_curie, is_identifiable, is_literal, pjs_copy)

# pylint: disable=duplicate-code
try:
__version__ = get_distribution(__name__).version
except DistributionNotFound: # pragma: nocover
Expand Down
15 changes: 8 additions & 7 deletions src/ga4gh/core/_internal/identifiers.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,13 +32,14 @@

# Assume that ga4gh.yaml and vrs.yaml files are in the same directory for now
schema_dir = os.environ.get("VRSATILE_SCHEMA_DIR", pkg_resources.resource_filename(__name__, "data/schemas/vrsatile"))
cfg = yaml.safe_load(open(schema_dir + "/merged.yaml"))
defs = cfg["definitions"]
with open(schema_dir + "/merged.yaml", "r", encoding="utf-8") as f:
cfg = yaml.safe_load(f)

type_prefix_map_default = dict()
for k,v in defs.items():
if "ga4gh_prefix" in v:
type_prefix_map_default[k] = v["ga4gh_prefix"]
defs = cfg["definitions"]
type_prefix_map_default = {}
for key, val in defs.items():
if "ga4gh_prefix" in val:
type_prefix_map_default[key] = val["ga4gh_prefix"]

namespace = "ga4gh"
curie_sep = ":"
Expand Down Expand Up @@ -147,7 +148,7 @@ def ga4gh_serialize(vro):

"""

def dictify(vro, enref=True):
def dictify(vro, enref=True): # pylint: disable=too-many-return-statements
"""recursively converts (any) object to dictionary prior to
serialization

Expand Down
2 changes: 1 addition & 1 deletion src/ga4gh/core/_internal/jsonschema.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@

def build_models(path, standardize_names=False):
"""load models from json schema at path"""
with open(path, "r") as yaml_file:
with open(path, "r", encoding="utf-8") as yaml_file:
y = yaml.load(yaml_file, yaml.SafeLoader)
builder = pjs.ObjectBuilder(pjs_filter(y))
models = builder.build_classes(standardize_names=standardize_names)
Expand Down
4 changes: 2 additions & 2 deletions src/ga4gh/vrs/dataproxy.py
Original file line number Diff line number Diff line change
Expand Up @@ -152,7 +152,7 @@ def _get_sequence(self, identifier, start=None, end=None):
url = self.base_url + f"sequence/{identifier}"
_logger.info("Fetching %s", url)
params = {"start": start, "end": end}
resp = requests.get(url, params=params)
resp = requests.get(url, params=params, timeout=5)
if resp.status_code == 404:
raise KeyError(identifier)
resp.raise_for_status()
Expand All @@ -161,7 +161,7 @@ def _get_sequence(self, identifier, start=None, end=None):
def _get_metadata(self, identifier):
url = self.base_url + f"metadata/{identifier}"
_logger.info("Fetching %s", url)
resp = requests.get(url)
resp = requests.get(url, timeout=5)
if resp.status_code == 404:
raise KeyError(identifier)
resp.raise_for_status()
Expand Down
54 changes: 30 additions & 24 deletions src/ga4gh/vrs/extras/localizer.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,6 @@
"""convert named locations into SequenceLocations ("localize") by
reference to external data

"""

import copy

from bioutils.accessions import coerce_namespace
from bioutils.assemblies import make_name_ac_map
from bioutils.cytobands import get_cytoband_maps
Expand Down Expand Up @@ -45,27 +41,25 @@ def __init__(self):
self._ana_maps = {k: make_name_ac_map(k) for k in assy_name_to_map_name}


def localize_allele(self, allele):
def localize_allele(self, allele, assembly_name = "GRCh38"):
"""Converts VRS Allele's named features to sequence location"""
# copy input variant and replace location
# N.B. deepcopy leads to recursion errors
allele_sl = ga4gh.vrs.models.Variation(**allele.as_dict())
del allele_sl.id
allele_sl.location = self.localize(allele.location)
allele_sl.location = self.localize_named_feature(allele.location, assembly_name)
return allele_sl


def localize_named_feature(self, loc, assembly_name):
"""converts named features to sequence locations

"""

"""converts named features to sequence locations"""
assert loc.type._value == "ChromosomeLocation", "Expected a ChromosomeLocation object"

def _get_coords(m, cb):
"""return (start,end) of band `cb` in map `m`"""
if cb is None:
return None
return chr_cb_map[cb][0:2]
return m[cb][0:2]

try:
map_name = assy_name_to_map_name[assembly_name]
Expand All @@ -82,14 +76,14 @@ def _get_coords(m, cb):

coords = []
try:
coords += _get_coords(chr_cb_map, loc.interval.start)
coords += _get_coords(chr_cb_map, loc.start)
except (KeyError, ValueError) as e:
raise ValueError(f"{loc.interval.start}: ChromosomeLocation not in"
raise ValueError(f"{loc.start}: ChromosomeLocation not in"
" map for {assembly_name}, chr {loc.chr}") from e
try:
coords += _get_coords(chr_cb_map, loc.interval.end)
coords += _get_coords(chr_cb_map, loc.end)
except (KeyError, ValueError) as e:
raise ValueError(f"{loc.interval.end}: ChromosomeLocation not in"
raise ValueError(f"{loc.end}: ChromosomeLocation not in"
" map for {assembly_name}, chr {loc.chr}") from e

# the following works regardless of orientation of bands and number of bands
Expand All @@ -98,19 +92,31 @@ def _get_coords(m, cb):
try:
ac = self._ana_maps[assembly_name][loc.chr]
except KeyError as e:
raise ValueError(f"No accssion for {loc.chr} in assembly {assembly_name}") from e
raise ValueError(f"No accession for {loc.chr} in assembly {assembly_name}") from e

return ga4gh.vrs.models.SequenceLocation(
sequence_id=coerce_namespace(ac),
interval=ga4gh.vrs.models.SequenceInterval(
start=ga4gh.vrs.models.Number(value=start),
end=ga4gh.vrs.models.Number(value=end))
)



start=ga4gh.vrs.models.Number(value=start),
end=ga4gh.vrs.models.Number(value=end)
)


if __name__ == "__main__":
cbl = ga4gh.vrs.models.ChromosomeLocation(chr="11", start="q22.3", end="q23.1")
lr = Localizer()
allele_dict = {
"location": {
"chr":"19",
"end": "q13.32",
"start": "q13.32",
"species_id":"taxonomy:9606",
"type":"ChromosomeLocation"
},
"state": {
"sequence": "T",
"type": "LiteralSequenceExpression"
},
"type": "Allele"
}
allele_vo = ga4gh.vrs.models.Allele(**allele_dict)
allele_with_seq_loc = lr.localize_allele(allele_vo)
print("Allele with SequenceLocation:", allele_with_seq_loc.as_dict())
31 changes: 17 additions & 14 deletions src/ga4gh/vrs/extras/translator.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@
import hgvs.dataproviders.uta

from ga4gh.core import ga4gh_identify
from ga4gh.vrs import models, normalize
from ga4gh.vrs import models, normalize as do_normalize
from ga4gh.vrs.extras.decorators import lazy_property # this should be relocated
from ga4gh.vrs.utils.hgvs_tools import HgvsTools

Expand All @@ -42,7 +42,7 @@ class Translator:
spdi_re = re.compile(r"(?P<ac>[^:]+):(?P<pos>\d+):(?P<del_len_or_seq>\w*):(?P<ins_seq>\w*)")


def __init__(self,
def __init__(self, # pylint: disable=too-many-arguments
data_proxy,
default_assembly_name="GRCh38",
translate_sequence_identifiers=True,
Expand Down Expand Up @@ -72,7 +72,7 @@ def translate_from(self, var, fmt=None):
raise ValueError(f"Unable to parse data as {fmt} variation")
return o

for fmt, t in self.from_translators.items():
for _, t in self.from_translators.items():
o = t(self, var)
if o:
return o
Expand Down Expand Up @@ -192,7 +192,7 @@ def _from_hgvs(self, hgvs_expr):
if not self.hgvs_re.match(hgvs_expr):
return None

sv = self._hgvs_parser.parse_hgvs_variant(hgvs_expr)
sv = self._hgvs_parser.parse_hgvs_variant(hgvs_expr) # pylint: disable=no-member

# prefix accession with namespace
sequence_id = coerce_namespace(sv.ac)
Expand Down Expand Up @@ -296,7 +296,7 @@ def _get_hgvs_tools(self):
self.hgvs_tools = HgvsTools(uta_conn)
return self.hgvs_tools

def _to_hgvs(self, vo, namespace="refseq"):
def _to_hgvs(self, vo, namespace="refseq"): # pylint: disable=too-many-locals
"""generates a *list* of HGVS expressions for VRS Allele.

If `namespace` is not None, returns HGVS strings for the
Expand Down Expand Up @@ -339,7 +339,7 @@ def ir_stype(a):
stype = stypes[0]

# build interval and edit depending on sequence type
if stype == "p":
if stype == "p": # pylint: disable=no-else-raise
raise ValueError("Only nucleic acid variation is currently supported")
# ival = hgvs.location.Interval(start=start, end=end)
# edit = hgvs.edit.AARefAlt(ref=None, alt=vo.state.sequence)
Expand Down Expand Up @@ -375,7 +375,7 @@ def ir_stype(a):
if ns.startswith("GRC") and namespace is None:
continue

if not (any(a.startswith(pfx) for pfx in ("NM", "NP", "NC", "NG"))):
if not any(a.startswith(pfx) for pfx in ("NM", "NP", "NC", "NG")):
continue

var.ac = a
Expand All @@ -389,7 +389,7 @@ def ir_stype(a):

hgvs_exprs += [str(var)]
except hgvs.exceptions.HGVSDataNotAvailableError:
_logger.warning(f"No data found for accession {a}")
_logger.warning("No data found for accession %s", a)

return list(set(hgvs_exprs))

Expand Down Expand Up @@ -425,6 +425,9 @@ def _to_spdi(self, vo, namespace="refseq"):


def is_valid_allele(self, vo):
"""Ensure that `vo` is a valid VRS Allele with SequenceLocation and
LiteralSequenceExpression
"""
return (vo.type == "Allele"
and vo.location.type == "SequenceLocation"
and vo.state.type == "LiteralSequenceExpression")
Expand All @@ -447,7 +450,7 @@ def _post_process_imported_allele(self, allele):
allele.location.sequence_id = seq_id

if self.normalize:
allele = normalize(allele, self.data_proxy)
allele = do_normalize(allele, self.data_proxy)

if self.identify:
allele.id = ga4gh_identify(allele)
Expand Down Expand Up @@ -511,16 +514,16 @@ def _seq_id_mapper(self, ir):
"start": {"value": 21, "type": "Number"}
}
]
formats = ["hgvs", "gnomad", "beacon", "spdi", "vrs", None]
from_formats = ["hgvs", "gnomad", "beacon", "spdi", "vrs", None]

for e in expressions:
print(f"* {e}")
for f in formats:
for f in from_formats:
try:
o = tlr.translate_from(e, f)
r = o.type
vrs_obj = tlr.translate_from(e, f)
r = vrs_obj.type
except ValueError:
r = "-"
except Exception as ex:
except Exception as ex: # pylint: disable=broad-except
r = ex.__class__.__name__
print(f" {f}: {r}")
6 changes: 3 additions & 3 deletions src/ga4gh/vrs/extras/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,19 +35,19 @@ def _format_time(timespan, precision=3):
value = int(leftover / length)
if value > 0:
leftover = leftover % length
time.append(u"%s%s" % (str(value), suffix))
time.append(f"{value}{suffix}")
if leftover < 1:
break
return " ".join(time)

units = [u"s", u"ms", u"us", u"ns"] # the save value
units = ["s", "ms", "us", "ns"] # the save value
scaling = [1, 1e3, 1e6, 1e9]

if timespan > 0.0:
order = min(-int(math.floor(math.log10(timespan)) // 3), 3)
else:
order = 3
return u"%.*g %s" % (precision, timespan * scaling[order], units[order])
return f"{timespan * scaling[order]:.{precision}g} {units[order]}"


def hex_to_base64url(s):
Expand Down
Loading