-
Notifications
You must be signed in to change notification settings - Fork 23
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Enumerate entry/sense/synset ids for validation
Fixes #228
- Loading branch information
Showing
7 changed files
with
127 additions
and
3 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,28 @@ | ||
<?xml version="1.0" encoding="UTF-8"?> | ||
<!DOCTYPE LexicalResource SYSTEM "http://globalwordnet.github.io/schemas/WN-LMF-1.0.dtd"> | ||
<LexicalResource xmlns:dc="http://purl.org/dc/elements/1.1/"> | ||
|
||
<!-- duplicate ID in lexical entries --> | ||
|
||
<Lexicon id="test-e101" | ||
label="Testing E101" | ||
language="en" | ||
email="[email protected]" | ||
license="https://creativecommons.org/licenses/by/4.0/" | ||
version="1"> | ||
|
||
<LexicalEntry id="test-e101-foo-n"> | ||
<Lemma partOfSpeech="n" writtenForm="foo" /> | ||
<Sense id="test-e101-foo" synset="test-e101-01-n" /> | ||
</LexicalEntry> | ||
|
||
<LexicalEntry id="test-e101-foo-n"> | ||
<Lemma partOfSpeech="n" writtenForm="foo2" /> | ||
<Sense id="test-e101-foo2" synset="test-e101-01-n" /> | ||
</LexicalEntry> | ||
|
||
<Synset id="test-e101-01-n" ili="i12345" partOfSpeech="n" /> | ||
|
||
</Lexicon> | ||
|
||
</LexicalResource> |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,25 @@ | ||
<?xml version="1.0" encoding="UTF-8"?> | ||
<!DOCTYPE LexicalResource SYSTEM "http://globalwordnet.github.io/schemas/WN-LMF-1.0.dtd"> | ||
<LexicalResource xmlns:dc="http://purl.org/dc/elements/1.1/"> | ||
|
||
<!-- duplicate ID in senses --> | ||
|
||
<Lexicon id="test-e101" | ||
label="Testing E101" | ||
language="en" | ||
email="[email protected]" | ||
license="https://creativecommons.org/licenses/by/4.0/" | ||
version="1"> | ||
|
||
<LexicalEntry id="test-e101-foo-n"> | ||
<Lemma partOfSpeech="n" writtenForm="foo" /> | ||
<Sense id="test-e101-foo" synset="test-e101-01-n" /> | ||
<Sense id="test-e101-foo" synset="test-e101-02-n" /> | ||
</LexicalEntry> | ||
|
||
<Synset id="test-e101-01-n" ili="i12345" partOfSpeech="n" /> | ||
<Synset id="test-e101-02-n" ili="i12346" partOfSpeech="n" /> | ||
|
||
</Lexicon> | ||
|
||
</LexicalResource> |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,24 @@ | ||
<?xml version="1.0" encoding="UTF-8"?> | ||
<!DOCTYPE LexicalResource SYSTEM "http://globalwordnet.github.io/schemas/WN-LMF-1.0.dtd"> | ||
<LexicalResource xmlns:dc="http://purl.org/dc/elements/1.1/"> | ||
|
||
<!-- duplicate ID in synsets --> | ||
|
||
<Lexicon id="test-e101" | ||
label="Testing E101" | ||
language="en" | ||
email="[email protected]" | ||
license="https://creativecommons.org/licenses/by/4.0/" | ||
version="1"> | ||
|
||
<LexicalEntry id="test-e101-foo-n"> | ||
<Lemma partOfSpeech="n" writtenForm="foo" /> | ||
<Sense id="test-e101-foo-n" synset="test-e101-01-n" /> | ||
</LexicalEntry> | ||
|
||
<Synset id="test-e101-01-n" ili="i12345" partOfSpeech="n" /> | ||
<Synset id="test-e101-01-n" ili="i12346" partOfSpeech="n" /> | ||
|
||
</Lexicon> | ||
|
||
</LexicalResource> |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,23 @@ | ||
<?xml version="1.0" encoding="UTF-8"?> | ||
<!DOCTYPE LexicalResource SYSTEM "http://globalwordnet.github.io/schemas/WN-LMF-1.0.dtd"> | ||
<LexicalResource xmlns:dc="http://purl.org/dc/elements/1.1/"> | ||
|
||
<!-- duplicate ID in different entity types --> | ||
|
||
<Lexicon id="test-e101" | ||
label="Testing E101" | ||
language="en" | ||
email="[email protected]" | ||
license="https://creativecommons.org/licenses/by/4.0/" | ||
version="1"> | ||
|
||
<LexicalEntry id="test-e101-foo-n"> | ||
<Lemma partOfSpeech="n" writtenForm="foo" /> | ||
<Sense id="test-e101-foo-n" synset="test-e101-01-n" /> | ||
</LexicalEntry> | ||
|
||
<Synset id="test-e101-01-n" ili="i12345" partOfSpeech="n" /> | ||
|
||
</Lexicon> | ||
|
||
</LexicalResource> |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,19 @@ | ||
from collections.abc import Sequence | ||
from pathlib import Path | ||
|
||
from wn import lmf | ||
from wn.validate import validate | ||
|
||
|
||
def _assert_invalid(select: str, path: Path) -> None: | ||
lex = lmf.load(path, progress_handler=None)["lexicons"][0] | ||
report = validate(lex, select=[select], progress_handler=None) | ||
print(report) | ||
assert len(report[select]["items"]) > 0 | ||
|
||
|
||
def test_E101(datadir): | ||
_assert_invalid("E101", datadir / "E101-0.xml") | ||
_assert_invalid("E101", datadir / "E101-1.xml") | ||
_assert_invalid("E101", datadir / "E101-2.xml") | ||
_assert_invalid("E101", datadir / "E101-3.xml") |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters