diff --git a/CHANGELOG.md b/CHANGELOG.md
index cd2021f..0907b39 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -11,6 +11,9 @@
* `Relation` class ([#216])
* `Sense.relation_map()` method ([#216])
* `Synset.relation_map()` method ([#167], [#216])
+* `W305` blank definition on synset validation ([#151])
+* `W306` blank example on synset validation ([#151])
+* `W307` repeated definition on synset validation ([#151])
## Fixed
@@ -687,6 +690,7 @@ abandoned, but this is an entirely new codebase.
[#146]: https://github.com/goodmami/wn/issues/146
[#147]: https://github.com/goodmami/wn/issues/147
[#148]: https://github.com/goodmami/wn/issues/148
+[#151]: https://github.com/goodmami/wn/issues/151
[#152]: https://github.com/goodmami/wn/issues/152
[#154]: https://github.com/goodmami/wn/issues/154
[#155]: https://github.com/goodmami/wn/issues/155
diff --git a/tests/data/W305-0.xml b/tests/data/W305-0.xml
new file mode 100644
index 0000000..3a6b2df
--- /dev/null
+++ b/tests/data/W305-0.xml
@@ -0,0 +1,27 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/tests/data/W306-0.xml b/tests/data/W306-0.xml
new file mode 100644
index 0000000..1aae60b
--- /dev/null
+++ b/tests/data/W306-0.xml
@@ -0,0 +1,27 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/tests/data/W307-0.xml b/tests/data/W307-0.xml
new file mode 100644
index 0000000..61f39e7
--- /dev/null
+++ b/tests/data/W307-0.xml
@@ -0,0 +1,30 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ foo
+
+
+
+ foo
+
+
+
+
+
diff --git a/tests/validate_test.py b/tests/validate_test.py
index 76935db..8509600 100644
--- a/tests/validate_test.py
+++ b/tests/validate_test.py
@@ -1,18 +1,24 @@
-from pathlib import Path
+import pytest
from wn import lmf
from wn.validate import validate
+tests = [
+ ("E101", 0),
+ ("E101", 1),
+ ("E101", 2),
+ ("E101", 3),
+ ("W305", 0),
+ ("W306", 0),
+ ("W307", 0),
+]
+test_ids = [f"{code}-{i}" for code, i in tests]
-def _assert_invalid(select: str, path: Path) -> None:
+
+@pytest.mark.parametrize("code,i", tests, ids=test_ids)
+def test_validate(datadir, code: str, i: int) -> None:
+ path = datadir / f"{code}-{i}.xml"
lex = lmf.load(path, progress_handler=None)["lexicons"][0]
- report = validate(lex, select=[select], progress_handler=None)
+ report = validate(lex, select=[code], progress_handler=None)
print(report)
- assert len(report[select]["items"]) > 0
-
-
-def test_E101(datadir):
- _assert_invalid("E101", datadir / "E101-0.xml")
- _assert_invalid("E101", datadir / "E101-1.xml")
- _assert_invalid("E101", datadir / "E101-2.xml")
- _assert_invalid("E101", datadir / "E101-3.xml")
+ assert len(report[code]["items"]) > 0
diff --git a/wn/validate.py b/wn/validate.py
index 9e9dd4b..2d867c6 100644
--- a/wn/validate.py
+++ b/wn/validate.py
@@ -16,6 +16,9 @@
W302 ILI is repeated across synsets.
W303 Proposed ILI is missing a definition.
W304 Existing ILI has a spurious definition.
+W305 Synset has a blank definition.
+W306 Synset has a blank example.
+W307 Synset repeats an existing definition.
E401 Relation target is missing or invalid.
W402 Relation type is invalid for the source and target.
W403 Redundant relation between source and target.
@@ -125,6 +128,34 @@ def _spurious_ili_definition(lex: lmf.Lexicon, ids: _Ids) -> _Result:
if ss['ili'] and ss['ili'] != 'in' and ss.get('ili_definition')}
+def _blank_synset_definition(lex: lmf.Lexicon, ids: _Ids) -> _Result:
+ """synset has a blank definition"""
+ return {
+ ss['id']: {} for ss in _synsets(lex)
+ if any(dfn["text"].strip() == "" for dfn in ss.get("definitions", []))
+ }
+
+def _blank_synset_example(lex: lmf.Lexicon, ids: _Ids) -> _Result:
+ """synset has a blank example"""
+ return {
+ ss['id']: {} for ss in _synsets(lex)
+ if any(ex["text"].strip() == "" for ex in ss.get("examples", []))
+ }
+
+
+def _repeated_synset_definition(lex: lmf.Lexicon, ids: _Ids) -> _Result:
+ """synset repeats an existing definition"""
+ repeated = _multiples(
+ dfn["text"]
+ for ss in _synsets(lex)
+ for dfn in ss.get("definitions", [])
+ )
+ return {
+ ss["id"]: {} for ss in _synsets(lex)
+ if any(dfn["text"] in repeated for dfn in ss.get("definitions", []))
+ }
+
+
def _missing_relation_target(lex: lmf.Lexicon, ids: _Ids) -> _Result:
"""relation target is missing or invalid"""
result = {s['id']: {'type': r['relType'], 'target': r['target']}
@@ -253,6 +284,9 @@ def _get_dc_type(r: lmf.Relation) -> Optional[str]:
'W302': _repeated_ili,
'W303': _missing_ili_definition,
'W304': _spurious_ili_definition,
+ 'W305': _blank_synset_definition,
+ 'W306': _blank_synset_example,
+ 'W307': _repeated_synset_definition,
# 400 - relations
'E401': _missing_relation_target,
'W402': _invalid_relation_type,