-
Notifications
You must be signed in to change notification settings - Fork 23
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
2 changed files
with
221 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,171 @@ | ||
import tempfile | ||
from collections.abc import Iterator | ||
from itertools import product, cycle | ||
from pathlib import Path | ||
|
||
import pytest | ||
|
||
import wn | ||
from wn import lmf | ||
|
||
|
||
@pytest.fixture | ||
def clean_db(): | ||
|
||
def clean_db(): | ||
wn.remove("*") | ||
dummy_lex = lmf.Lexicon( | ||
id="dummy", | ||
version="1", | ||
label="placeholder to initialize the db", | ||
language="zxx", | ||
email="", | ||
license="", | ||
) | ||
wn.add_lexical_resource( | ||
lmf.LexicalResource(lmf_version="1.3", lexicons=[dummy_lex]) | ||
) | ||
|
||
return clean_db | ||
|
||
|
||
@pytest.fixture(scope="session") | ||
def datadir(): | ||
return Path(__file__).parent.parent / "tests" / "data" | ||
|
||
|
||
@pytest.fixture(scope="session") | ||
def mini_lmf_1_0(datadir): | ||
return datadir / "mini-lmf-1.0.xml" | ||
|
||
|
||
@pytest.fixture(scope="session") | ||
def empty_db_dir(): | ||
with tempfile.TemporaryDirectory("wn_data_empty") as dir: | ||
yield Path(dir) | ||
|
||
|
||
@pytest.fixture | ||
def empty_db(monkeypatch, empty_db_dir, clean_db): | ||
with monkeypatch.context() as m: | ||
m.setattr(wn.config, "data_directory", empty_db_dir) | ||
clean_db() | ||
yield | ||
|
||
|
||
@pytest.fixture(scope="session") | ||
def mock_lmf(): | ||
synsets: list[lmf.Synset] = [ | ||
* _make_synsets("n", 20000), | ||
* _make_synsets("v", 10000), | ||
* _make_synsets("a", 2000), | ||
* _make_synsets("r", 1000), | ||
] | ||
entries = _make_entries(synsets) | ||
lexicon = lmf.Lexicon( | ||
id="mock", | ||
version="1", | ||
label="", | ||
language="zxx", | ||
email="", | ||
license="", | ||
entries=entries, | ||
synsets=synsets, | ||
) | ||
return lmf.LexicalResource(lmf_version="1.3", lexicons=[lexicon]) | ||
|
||
|
||
@pytest.fixture(scope="session") | ||
def mock_db_dir(mock_lmf): | ||
with tempfile.TemporaryDirectory("wn_data_empty") as dir: | ||
old_data_dir = wn.config.data_directory | ||
wn.config.data_directory = dir | ||
wn.add_lexical_resource(mock_lmf, progress_handler=None) | ||
wn.config.data_directory = old_data_dir | ||
yield Path(dir) | ||
# close any open DB connections before teardown | ||
for conn in wn._db.pool.values(): | ||
conn.close() | ||
|
||
|
||
@pytest.fixture | ||
def mock_db(monkeypatch, mock_db_dir): | ||
with monkeypatch.context() as m: | ||
m.setattr(wn.config, "data_directory", mock_db_dir) | ||
yield | ||
|
||
|
||
def _make_synsets(pos: str, n: int) -> list[lmf.Synset]: | ||
synsets: list[lmf.Synset] = [ | ||
lmf.Synset( | ||
id=f"{i}-{pos}", | ||
ili="", | ||
partOfSpeech=pos, | ||
relations=[], | ||
meta={}, | ||
) | ||
for i in range(1, n+1) | ||
] | ||
# add relations for nouns and verbs | ||
if pos in "nv": | ||
total = len(synsets) | ||
tgt_i = 1 # index of next target synset | ||
n = cycle([2]) # how many targets to relate | ||
for cur_i in range(total): | ||
if tgt_i <= cur_i: | ||
tgt_i = cur_i + 1 | ||
source = synsets[cur_i] | ||
for cur_k in range(tgt_i, tgt_i + next(n)): | ||
if cur_k >= total: | ||
break | ||
target = synsets[cur_k] | ||
source["relations"].append( | ||
lmf.Relation(target=target["id"], relType="hyponym", meta={}) | ||
) | ||
target["relations"].append( | ||
lmf.Relation(target=source["id"], relType="hypernym", meta={}) | ||
) | ||
tgt_i = cur_k + 1 | ||
|
||
return synsets | ||
|
||
|
||
def _words() -> Iterator[str]: | ||
consonants = "kgtdpbfvszrlmnhw" | ||
vowels = "aeiou" | ||
while True: | ||
yield from map("".join, product(consonants, vowels, consonants, vowels)) | ||
|
||
|
||
def _make_entries(synsets: list[lmf.Synset]) -> list[lmf.LexicalEntry]: | ||
words = _words() | ||
member_count = cycle(range(1, 4)) # 1, 2, or 3 synset members | ||
entries: dict[str, lmf.LexicalEntry] = {} | ||
prev_synsets: list[lmf.Synset] = [] | ||
for synset in synsets: | ||
ssid = synset["id"] | ||
pos = synset["partOfSpeech"] | ||
|
||
for _ in range(next(member_count)): | ||
word = next(words) | ||
senses = [lmf.Sense(id=f"{word}-{ssid}", synset=ssid, meta={})] | ||
# add some polysemy | ||
if prev_synsets: | ||
ssid2 = prev_synsets.pop()["id"] | ||
senses.append(lmf.Sense(id=f"{word}-{ssid2}", synset=ssid2, meta={})) | ||
eid = f"{word}-{pos}" | ||
if eid not in entries: | ||
entries[eid] = lmf.LexicalEntry( | ||
id=eid, | ||
lemma=lmf.Lemma( | ||
writtenForm=word, | ||
partOfSpeech=pos, | ||
), | ||
senses=[], | ||
meta={}, | ||
) | ||
entries[eid]["senses"].extend(senses) | ||
|
||
prev_synsets.append(synset) | ||
|
||
return list(entries.values()) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,50 @@ | ||
import wn | ||
from wn import lmf | ||
|
||
import pytest | ||
|
||
|
||
@pytest.mark.benchmark(group="lmf.load", warmup=True) | ||
def test_load(mini_lmf_1_0, benchmark): | ||
benchmark(lmf.load, mini_lmf_1_0) | ||
|
||
|
||
@pytest.mark.benchmark(group="wn.add_lexical_resource") | ||
@pytest.mark.usefixtures('empty_db') | ||
def test_add_lexical_resource(mock_lmf, benchmark): | ||
# TODO: when pytest-benchmark's teardown option is released, use | ||
# that here with more rounds | ||
benchmark.pedantic( | ||
wn.add_lexical_resource, | ||
args=(mock_lmf,), | ||
# teardown=clean_db, | ||
iterations=1, | ||
rounds=1, | ||
) | ||
|
||
|
||
@pytest.mark.benchmark(group="wn.add_lexical_resource") | ||
@pytest.mark.usefixtures('empty_db') | ||
def test_add_lexical_resource_no_progress(mock_lmf, benchmark): | ||
# TODO: when pytest-benchmark's teardown option is released, use | ||
# that here with more rounds | ||
benchmark.pedantic( | ||
wn.add_lexical_resource, | ||
args=(mock_lmf,), | ||
kwargs={"progress_handler": None}, | ||
# teardown=clean_db, | ||
iterations=1, | ||
rounds=1, | ||
) | ||
|
||
|
||
@pytest.mark.benchmark(group="primary queries") | ||
@pytest.mark.usefixtures('mock_db') | ||
def test_synsets(mock_db, benchmark): | ||
benchmark(wn.synsets) | ||
|
||
|
||
@pytest.mark.benchmark(group="primary queries") | ||
@pytest.mark.usefixtures('mock_db') | ||
def test_words(mock_db, benchmark): | ||
benchmark(wn.words) |