Skip to content

Commit

Permalink
fix exception handling when one score fails matching
Browse files Browse the repository at this point in the history
  • Loading branch information
nebfield committed Jun 7, 2024
1 parent 08970ed commit 0b59582
Show file tree
Hide file tree
Showing 4 changed files with 38 additions and 20 deletions.
2 changes: 1 addition & 1 deletion pgscatalog.match/src/pgscatalog/match/cli/match_cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@

import polars as pl

from ..lib import (
from .. import (
VariantFrame,
ScoringFileFrame,
match_variants,
Expand Down
21 changes: 15 additions & 6 deletions pgscatalog.match/src/pgscatalog/match/lib/matchresult.py
Original file line number Diff line number Diff line change
Expand Up @@ -272,6 +272,7 @@ def write_scorefiles(
_ = self.label(**kwargs) # self.df gets updated

if not self._filtered:
# score_df = original scoring file variants
_ = self.filter(score_df=score_df, min_overlap=min_overlap)

# a summary log contains up to one variant (the best match) for each variant
Expand All @@ -296,12 +297,20 @@ def write_scorefiles(
self.summary_log = self.summary_log.collect()

# error at the end, to allow logs to be generated
if not all(x[1] for x in self.filter_summary.iter_rows()):
logger.warning(f"{self.filter_summary}")
[
x.unlink() for xs in outfs for x in xs
] # don't provide dodgy scoring files
raise MatchRateError(
for x in self.filter_summary.iter_rows():
try:
if not x[1]:
raise MatchRateError("Fails matching")
except MatchRateError:
# we reported the exception, but it's fine
logger.warning(f"Score {x[0]} matching failed with match rate {x[2]}")
continue
else:
logger.info(f"Score {x[0]} matching passed with match rate {x[2]}")

# unless everything is bad!
if not any(x[1] for x in self.filter_summary.iter_rows()):
raise ZeroMatchesError(
f"All scores fail to meet match threshold {min_overlap}"
)

Expand Down
29 changes: 19 additions & 10 deletions pgscatalog.match/tests/test_match_cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
from unittest.mock import patch
import pytest

from pgscatalog.core import ZeroMatchesError, MatchRateError
from pgscatalog.core import ZeroMatchesError
from pgscatalog.match.cli.match_cli import run_match


Expand Down Expand Up @@ -124,8 +124,11 @@ def test_only_match(tmp_path_factory, good_scorefile, good_variants):
assert "0.ipc.zst" in os.listdir(outdir)


def test_strict_match(tmp_path_factory, good_scorefile, good_variants):
"""Test matching well with extremely strict overlap to trigger a MatchRateError"""
def test_stringent_match(tmp_path_factory, good_scorefile, good_variants):
"""Test matching well with stringent match rate.
Some scores pass, some fail, but the application exits successfully.
"""
outdir = tmp_path_factory.mktemp("outdir")

args = [
Expand All @@ -140,20 +143,26 @@ def test_strict_match(tmp_path_factory, good_scorefile, good_variants):
"--outdir",
str(outdir),
"--min_overlap",
"1.0",
"0.9",
)
]
flargs = list(itertools.chain(*args))

with pytest.raises(MatchRateError):
with patch("sys.argv", flargs):
run_match()
with patch("sys.argv", flargs):
run_match()

assert (outdir / "test_log.csv.gz").exists()
assert (outdir / "test_summary.csv").exists()
assert not (outdir / "test_ALL_recessive_0.scorefile.gz").exists()
assert not (outdir / "test_ALL_dominant_0.scorefile.gz").exists()
assert not (outdir / "test_ALL_additive_0.scorefile.gz").exists()

with open(outdir / "test_summary.csv") as f:
summary = list(csv.DictReader(f))
# at least one score fails matching
assert any([x["score_pass"] == "false" for x in summary])
# and at least one score passes matching
assert any([x["score_pass"] == "true" for x in summary])

# but scoring files are still written because at least one score passed
assert (outdir / "test_ALL_additive_0.scorefile.gz").exists()


def test_match_fail(tmp_path_factory, bad_scorefile, good_variants):
Expand Down
6 changes: 3 additions & 3 deletions pgscatalog.match/tests/test_merge_cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@

from glob import glob

from pgscatalog.core import MatchRateError
from pgscatalog.core import ZeroMatchesError
from pgscatalog.match.cli.merge_cli import run_merge


Expand Down Expand Up @@ -120,7 +120,7 @@ def test_splitcombine_output(tmp_path_factory, good_scorefile, match_ipc):


def test_strict_merge(tmp_path_factory, good_scorefile, match_ipc):
"""Test merging with extremely strict overlap to trigger a MatchRateError"""
"""Test merging with extremely strict overlap to trigger a ZeroMatchesError"""
outdir = tmp_path_factory.mktemp("outdir")

args = [
Expand All @@ -140,6 +140,6 @@ def test_strict_merge(tmp_path_factory, good_scorefile, match_ipc):
]
flargs = list(itertools.chain(*args))

with pytest.raises(MatchRateError):
with pytest.raises(ZeroMatchesError):
with patch("sys.argv", flargs):
run_merge()

0 comments on commit 0b59582

Please sign in to comment.