-
Notifications
You must be signed in to change notification settings - Fork 23
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Add validation for synset definitions and examples (#230)
- W305 blank definition - W306 blank example - W307 repeated definition Resolves #151
- Loading branch information
Showing
6 changed files
with
139 additions
and
11 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,27 @@ | ||
<?xml version="1.0" encoding="UTF-8"?> | ||
<!DOCTYPE LexicalResource SYSTEM "http://globalwordnet.github.io/schemas/WN-LMF-1.0.dtd"> | ||
<LexicalResource xmlns:dc="http://purl.org/dc/elements/1.1/"> | ||
|
||
<!-- blank definition in synset --> | ||
|
||
<Lexicon id="test-w305" | ||
label="Testing W305" | ||
language="en" | ||
email="[email protected]" | ||
license="https://creativecommons.org/licenses/by/4.0/" | ||
version="1"> | ||
|
||
<LexicalEntry id="test-w305-foo-n"> | ||
<Lemma partOfSpeech="n" writtenForm="foo" /> | ||
<Sense id="test-w305-foo-n" synset="test-w305-01-n" /> | ||
</LexicalEntry> | ||
|
||
<Synset id="test-w305-01-n" ili="i12345" partOfSpeech="n"> | ||
<Definition> | ||
|
||
</Definition> | ||
</Synset> | ||
|
||
</Lexicon> | ||
|
||
</LexicalResource> |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,27 @@ | ||
<?xml version="1.0" encoding="UTF-8"?> | ||
<!DOCTYPE LexicalResource SYSTEM "http://globalwordnet.github.io/schemas/WN-LMF-1.0.dtd"> | ||
<LexicalResource xmlns:dc="http://purl.org/dc/elements/1.1/"> | ||
|
||
<!-- blank example in synset --> | ||
|
||
<Lexicon id="test-w306" | ||
label="Testing W306" | ||
language="en" | ||
email="[email protected]" | ||
license="https://creativecommons.org/licenses/by/4.0/" | ||
version="1"> | ||
|
||
<LexicalEntry id="test-w306-foo-n"> | ||
<Lemma partOfSpeech="n" writtenForm="foo" /> | ||
<Sense id="test-w306-foo-n" synset="test-w306-01-n" /> | ||
</LexicalEntry> | ||
|
||
<Synset id="test-w306-01-n" ili="i12345" partOfSpeech="n"> | ||
<Example> | ||
|
||
</Example> | ||
</Synset> | ||
|
||
</Lexicon> | ||
|
||
</LexicalResource> |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,30 @@ | ||
<?xml version="1.0" encoding="UTF-8"?> | ||
<!DOCTYPE LexicalResource SYSTEM "http://globalwordnet.github.io/schemas/WN-LMF-1.0.dtd"> | ||
<LexicalResource xmlns:dc="http://purl.org/dc/elements/1.1/"> | ||
|
||
<!-- repeated definition in synset --> | ||
|
||
<Lexicon id="test-w307" | ||
label="Testing W307" | ||
language="en" | ||
email="[email protected]" | ||
license="https://creativecommons.org/licenses/by/4.0/" | ||
version="1"> | ||
|
||
<LexicalEntry id="test-w307-foo-n"> | ||
<Lemma partOfSpeech="n" writtenForm="foo" /> | ||
<Sense id="test-w307-foo-1-n" synset="test-w307-01-n" /> | ||
<Sense id="test-w307-foo-2-n" synset="test-w307-02-n" /> | ||
</LexicalEntry> | ||
|
||
<Synset id="test-w307-01-n" ili="i12345" partOfSpeech="n"> | ||
<Definition>foo</Definition> | ||
</Synset> | ||
|
||
<Synset id="test-w307-02-n" ili="i12346" partOfSpeech="n"> | ||
<Definition>foo</Definition> | ||
</Synset> | ||
|
||
</Lexicon> | ||
|
||
</LexicalResource> |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,18 +1,24 @@ | ||
from pathlib import Path | ||
import pytest | ||
|
||
from wn import lmf | ||
from wn.validate import validate | ||
|
||
tests = [ | ||
("E101", 0), | ||
("E101", 1), | ||
("E101", 2), | ||
("E101", 3), | ||
("W305", 0), | ||
("W306", 0), | ||
("W307", 0), | ||
] | ||
test_ids = [f"{code}-{i}" for code, i in tests] | ||
|
||
def _assert_invalid(select: str, path: Path) -> None: | ||
|
||
@pytest.mark.parametrize("code,i", tests, ids=test_ids) | ||
def test_validate(datadir, code: str, i: int) -> None: | ||
path = datadir / f"{code}-{i}.xml" | ||
lex = lmf.load(path, progress_handler=None)["lexicons"][0] | ||
report = validate(lex, select=[select], progress_handler=None) | ||
report = validate(lex, select=[code], progress_handler=None) | ||
print(report) | ||
assert len(report[select]["items"]) > 0 | ||
|
||
|
||
def test_E101(datadir): | ||
_assert_invalid("E101", datadir / "E101-0.xml") | ||
_assert_invalid("E101", datadir / "E101-1.xml") | ||
_assert_invalid("E101", datadir / "E101-2.xml") | ||
_assert_invalid("E101", datadir / "E101-3.xml") | ||
assert len(report[code]["items"]) > 0 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters