Skip to content

Commit

Permalink
Merge branch 'main' into issue-452
Browse files Browse the repository at this point in the history
  • Loading branch information
korikuzma committed Oct 28, 2024
2 parents 9cab291 + db54d54 commit 8632858
Show file tree
Hide file tree
Showing 4 changed files with 103 additions and 2 deletions.
2 changes: 1 addition & 1 deletion docs/extras/vcf_annotator.md
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,7 @@ vrs-annotate vcf input.vcf.gz --vcf_out output.vcf.gz --vrs_pickle_out vrs_objec
```

### Other Options
`--vrs_attribute`
`--vrs_attributes`
>Will include VRS_Start, VRS_End, VRS_State fields in the INFO field.
`--assembly` [TEXT]
Expand Down
46 changes: 46 additions & 0 deletions src/ga4gh/vrs/extras/vcf_annotation.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
$ vrs-annotate vcf input.vcf.gz --vcf_out output.vcf.gz --vrs_pickle_out vrs_objects.pkl
"""
from collections.abc import Callable
import pathlib
import logging
import pickle
Expand Down Expand Up @@ -36,10 +37,55 @@ class SeqRepoProxyType(str, Enum):
@click.group()
def _cli() -> None:
"""Annotate input files with VRS variation objects."""
logging.basicConfig(
filename="vrs-annotate.log",
level=logging.INFO,
format="%(asctime)s - %(name)s - %(levelname)s - %(message)s"
)


class _LogLevel(str, Enum):
"""Define legal values for `--log_level` option."""
DEBUG = "debug"
INFO = "info"
WARNING = "warning"
ERROR = "error"
CRITICAL = "critical"


def _log_level_option(func: Callable) -> Callable:
"""Provide reusable log level CLI option decorator.
Adds a `--log_level` CLI option to any decorated command. Doesn't pass on any
values, just sets the logging level for this module.
:param func: incoming click command
:return: same command, wrapped with log level option
"""
def _set_log_level(ctx: dict, param: str, value: _LogLevel) -> None: # pylint: disable=unused-argument
level_map = {
_LogLevel.DEBUG: logging.DEBUG,
_LogLevel.INFO: logging.INFO,
_LogLevel.WARNING: logging.WARNING,
_LogLevel.ERROR: logging.ERROR,
_LogLevel.CRITICAL: logging.CRITICAL,
}
logging.getLogger(__name__).setLevel(level_map[value])

wrapped_func = click.option(
"--log_level",
type=click.Choice([v.value for v in _LogLevel.__members__.values()]),
default="info",
help="Set the logging level.",
callback=_set_log_level,
expose_value=False,
is_eager=True
)(func)
return wrapped_func


@_cli.command(name="vcf")
@_log_level_option
@click.argument(
"vcf_in",
nargs=1,
Expand Down
41 changes: 40 additions & 1 deletion src/ga4gh/vrs/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@
from ga4gh.core.pydantic import get_pydantic_root

from canonicaljson import encode_canonical_json
from pydantic import BaseModel, Field, RootModel, StringConstraints, ConfigDict
from pydantic import BaseModel, Field, RootModel, StringConstraints, ConfigDict, ValidationInfo, field_validator

from ga4gh.core.pydantic import (
getattr_in
Expand Down Expand Up @@ -366,6 +366,25 @@ class Range(RootModel):
min_length=2,
)

@field_validator("root", mode="after")
def validate_range(cls, v: List[Optional[int]]) -> List[Optional[int]]:
"""Validate range values
:param v: Root value
:raises ValueError: If ``root`` does not include at least one integer or if
the first element in ``root`` is greater than the second element in ``root``
:return: Inclusive range
"""
if v.count(None) == 2:
err_msg = "Must provide at least one integer."
raise ValueError(err_msg)

if v[0] is not None and v[1] is not None:
if v[0] > v[1]:
err_msg = "The first integer must be less than or equal to the second integer."
raise ValueError(err_msg)

return v

class Residue(RootModel):
"""A character representing a specific residue (i.e., molecular species) or
Expand Down Expand Up @@ -498,6 +517,26 @@ class SequenceLocation(_Ga4ghIdentifiableObject):
)
sequence: Optional[SequenceString] = Field(None, description="The literal sequence encoded by the `sequenceReference` at these coordinates.")

@field_validator("start", "end", mode="after")
def validate_start_end(cls, v: Optional[Union[Range, int]], info: ValidationInfo) -> Optional[Union[Range, int]]:
"""Validate ``start`` and ``end`` fields
:param v: ``start`` or ``end`` value
:param info: Validation info
:raises ValueError: If ``start`` or ``end`` has a value less than 0
:return: Sequence Location
"""
if v is not None:
if isinstance(v, int):
int_values = [v]
else:
int_values = [val for val in v.root if val is not None]

if any(int_val < 0 for int_val in int_values):
err_msg = f"The minimum value of `{info.field_name}` is 0."
raise ValueError(err_msg)
return v

def ga4gh_serialize_as_version(self, as_version: PrevVrsVersion):
"""This method will return a serialized string following the conventions for
SequenceLocation serialization as defined in the VRS version specified by
Expand Down
16 changes: 16 additions & 0 deletions tests/test_vrs.py
Original file line number Diff line number Diff line change
Expand Up @@ -96,6 +96,22 @@
cpb_431012 = models.CisPhasedBlock(**cpb_431012_dict)


@pytest.mark.parametrize(
"vrs_model, expected_err_msg",
[
(lambda: models.Range(root=[None, None]), "Must provide at least one integer."),
(lambda: models.Range(root=[2, 1]), "The first integer must be less than or equal to the second integer."),
(lambda: models.SequenceLocation(sequenceReference=allele_280320.location.sequenceReference, start=-1), "The minimum value of `start` is 0."),
(lambda: models.SequenceLocation(sequenceReference=allele_280320.location.sequenceReference, end=[-1, 0]), "The minimum value of `end` is 0."),
]
)
def test_model_validation_errors(vrs_model, expected_err_msg):
"""Test that invalid VRS models raise errors"""
with pytest.raises(ValueError) as e:
vrs_model()
assert str(e.value.errors()[0]["ctx"]["error"]) == expected_err_msg


def test_vr():
assert a.model_dump(exclude_none=True) == allele_dict
assert is_pydantic_instance(a)
Expand Down

0 comments on commit 8632858

Please sign in to comment.