diff --git a/CHANGELOG.md b/CHANGELOG.md index cd2021f..0907b39 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -11,6 +11,9 @@ * `Relation` class ([#216]) * `Sense.relation_map()` method ([#216]) * `Synset.relation_map()` method ([#167], [#216]) +* `W305` blank definition on synset validation ([#151]) +* `W306` blank example on synset validation ([#151]) +* `W307` repeated definition on synset validation ([#151]) ## Fixed @@ -687,6 +690,7 @@ abandoned, but this is an entirely new codebase. [#146]: https://github.com/goodmami/wn/issues/146 [#147]: https://github.com/goodmami/wn/issues/147 [#148]: https://github.com/goodmami/wn/issues/148 +[#151]: https://github.com/goodmami/wn/issues/151 [#152]: https://github.com/goodmami/wn/issues/152 [#154]: https://github.com/goodmami/wn/issues/154 [#155]: https://github.com/goodmami/wn/issues/155 diff --git a/tests/data/W305-0.xml b/tests/data/W305-0.xml new file mode 100644 index 0000000..3a6b2df --- /dev/null +++ b/tests/data/W305-0.xml @@ -0,0 +1,27 @@ + + + + + + + + + + + + + + + + + + + + + + diff --git a/tests/data/W306-0.xml b/tests/data/W306-0.xml new file mode 100644 index 0000000..1aae60b --- /dev/null +++ b/tests/data/W306-0.xml @@ -0,0 +1,27 @@ + + + + + + + + + + + + + + + + + + + + + + diff --git a/tests/data/W307-0.xml b/tests/data/W307-0.xml new file mode 100644 index 0000000..61f39e7 --- /dev/null +++ b/tests/data/W307-0.xml @@ -0,0 +1,30 @@ + + + + + + + + + + + + + + + + foo + + + + foo + + + + + diff --git a/tests/validate_test.py b/tests/validate_test.py index 76935db..8509600 100644 --- a/tests/validate_test.py +++ b/tests/validate_test.py @@ -1,18 +1,24 @@ -from pathlib import Path +import pytest from wn import lmf from wn.validate import validate +tests = [ + ("E101", 0), + ("E101", 1), + ("E101", 2), + ("E101", 3), + ("W305", 0), + ("W306", 0), + ("W307", 0), +] +test_ids = [f"{code}-{i}" for code, i in tests] -def _assert_invalid(select: str, path: Path) -> None: + +@pytest.mark.parametrize("code,i", tests, ids=test_ids) +def test_validate(datadir, code: str, i: int) -> None: + path = datadir / f"{code}-{i}.xml" lex = lmf.load(path, progress_handler=None)["lexicons"][0] - report = validate(lex, select=[select], progress_handler=None) + report = validate(lex, select=[code], progress_handler=None) print(report) - assert len(report[select]["items"]) > 0 - - -def test_E101(datadir): - _assert_invalid("E101", datadir / "E101-0.xml") - _assert_invalid("E101", datadir / "E101-1.xml") - _assert_invalid("E101", datadir / "E101-2.xml") - _assert_invalid("E101", datadir / "E101-3.xml") + assert len(report[code]["items"]) > 0 diff --git a/wn/validate.py b/wn/validate.py index 9e9dd4b..2d867c6 100644 --- a/wn/validate.py +++ b/wn/validate.py @@ -16,6 +16,9 @@ W302 ILI is repeated across synsets. W303 Proposed ILI is missing a definition. W304 Existing ILI has a spurious definition. +W305 Synset has a blank definition. +W306 Synset has a blank example. +W307 Synset repeats an existing definition. E401 Relation target is missing or invalid. W402 Relation type is invalid for the source and target. W403 Redundant relation between source and target. @@ -125,6 +128,34 @@ def _spurious_ili_definition(lex: lmf.Lexicon, ids: _Ids) -> _Result: if ss['ili'] and ss['ili'] != 'in' and ss.get('ili_definition')} +def _blank_synset_definition(lex: lmf.Lexicon, ids: _Ids) -> _Result: + """synset has a blank definition""" + return { + ss['id']: {} for ss in _synsets(lex) + if any(dfn["text"].strip() == "" for dfn in ss.get("definitions", [])) + } + +def _blank_synset_example(lex: lmf.Lexicon, ids: _Ids) -> _Result: + """synset has a blank example""" + return { + ss['id']: {} for ss in _synsets(lex) + if any(ex["text"].strip() == "" for ex in ss.get("examples", [])) + } + + +def _repeated_synset_definition(lex: lmf.Lexicon, ids: _Ids) -> _Result: + """synset repeats an existing definition""" + repeated = _multiples( + dfn["text"] + for ss in _synsets(lex) + for dfn in ss.get("definitions", []) + ) + return { + ss["id"]: {} for ss in _synsets(lex) + if any(dfn["text"] in repeated for dfn in ss.get("definitions", [])) + } + + def _missing_relation_target(lex: lmf.Lexicon, ids: _Ids) -> _Result: """relation target is missing or invalid""" result = {s['id']: {'type': r['relType'], 'target': r['target']} @@ -253,6 +284,9 @@ def _get_dc_type(r: lmf.Relation) -> Optional[str]: 'W302': _repeated_ili, 'W303': _missing_ili_definition, 'W304': _spurious_ili_definition, + 'W305': _blank_synset_definition, + 'W306': _blank_synset_example, + 'W307': _repeated_synset_definition, # 400 - relations 'E401': _missing_relation_target, 'W402': _invalid_relation_type,