From e026055564b40628a9a53ca502ff56c1be2cbde0 Mon Sep 17 00:00:00 2001 From: Michael Wayne Goodman Date: Fri, 28 Feb 2025 11:18:30 -0800 Subject: [PATCH] Actually add bench/ directory... --- bench/conftest.py | 171 ++++++++++++++++++++++++++++++++++++++++++++ bench/test_bench.py | 50 +++++++++++++ 2 files changed, 221 insertions(+) create mode 100644 bench/conftest.py create mode 100644 bench/test_bench.py diff --git a/bench/conftest.py b/bench/conftest.py new file mode 100644 index 0000000..9c8e9a3 --- /dev/null +++ b/bench/conftest.py @@ -0,0 +1,171 @@ +import tempfile +from collections.abc import Iterator +from itertools import product, cycle +from pathlib import Path + +import pytest + +import wn +from wn import lmf + + +@pytest.fixture +def clean_db(): + + def clean_db(): + wn.remove("*") + dummy_lex = lmf.Lexicon( + id="dummy", + version="1", + label="placeholder to initialize the db", + language="zxx", + email="", + license="", + ) + wn.add_lexical_resource( + lmf.LexicalResource(lmf_version="1.3", lexicons=[dummy_lex]) + ) + + return clean_db + + +@pytest.fixture(scope="session") +def datadir(): + return Path(__file__).parent.parent / "tests" / "data" + + +@pytest.fixture(scope="session") +def mini_lmf_1_0(datadir): + return datadir / "mini-lmf-1.0.xml" + + +@pytest.fixture(scope="session") +def empty_db_dir(): + with tempfile.TemporaryDirectory("wn_data_empty") as dir: + yield Path(dir) + + +@pytest.fixture +def empty_db(monkeypatch, empty_db_dir, clean_db): + with monkeypatch.context() as m: + m.setattr(wn.config, "data_directory", empty_db_dir) + clean_db() + yield + + +@pytest.fixture(scope="session") +def mock_lmf(): + synsets: list[lmf.Synset] = [ + * _make_synsets("n", 20000), + * _make_synsets("v", 10000), + * _make_synsets("a", 2000), + * _make_synsets("r", 1000), + ] + entries = _make_entries(synsets) + lexicon = lmf.Lexicon( + id="mock", + version="1", + label="", + language="zxx", + email="", + license="", + entries=entries, + synsets=synsets, + ) + return lmf.LexicalResource(lmf_version="1.3", lexicons=[lexicon]) + + +@pytest.fixture(scope="session") +def mock_db_dir(mock_lmf): + with tempfile.TemporaryDirectory("wn_data_empty") as dir: + old_data_dir = wn.config.data_directory + wn.config.data_directory = dir + wn.add_lexical_resource(mock_lmf, progress_handler=None) + wn.config.data_directory = old_data_dir + yield Path(dir) + # close any open DB connections before teardown + for conn in wn._db.pool.values(): + conn.close() + + +@pytest.fixture +def mock_db(monkeypatch, mock_db_dir): + with monkeypatch.context() as m: + m.setattr(wn.config, "data_directory", mock_db_dir) + yield + + +def _make_synsets(pos: str, n: int) -> list[lmf.Synset]: + synsets: list[lmf.Synset] = [ + lmf.Synset( + id=f"{i}-{pos}", + ili="", + partOfSpeech=pos, + relations=[], + meta={}, + ) + for i in range(1, n+1) + ] + # add relations for nouns and verbs + if pos in "nv": + total = len(synsets) + tgt_i = 1 # index of next target synset + n = cycle([2]) # how many targets to relate + for cur_i in range(total): + if tgt_i <= cur_i: + tgt_i = cur_i + 1 + source = synsets[cur_i] + for cur_k in range(tgt_i, tgt_i + next(n)): + if cur_k >= total: + break + target = synsets[cur_k] + source["relations"].append( + lmf.Relation(target=target["id"], relType="hyponym", meta={}) + ) + target["relations"].append( + lmf.Relation(target=source["id"], relType="hypernym", meta={}) + ) + tgt_i = cur_k + 1 + + return synsets + + +def _words() -> Iterator[str]: + consonants = "kgtdpbfvszrlmnhw" + vowels = "aeiou" + while True: + yield from map("".join, product(consonants, vowels, consonants, vowels)) + + +def _make_entries(synsets: list[lmf.Synset]) -> list[lmf.LexicalEntry]: + words = _words() + member_count = cycle(range(1, 4)) # 1, 2, or 3 synset members + entries: dict[str, lmf.LexicalEntry] = {} + prev_synsets: list[lmf.Synset] = [] + for synset in synsets: + ssid = synset["id"] + pos = synset["partOfSpeech"] + + for _ in range(next(member_count)): + word = next(words) + senses = [lmf.Sense(id=f"{word}-{ssid}", synset=ssid, meta={})] + # add some polysemy + if prev_synsets: + ssid2 = prev_synsets.pop()["id"] + senses.append(lmf.Sense(id=f"{word}-{ssid2}", synset=ssid2, meta={})) + eid = f"{word}-{pos}" + if eid not in entries: + entries[eid] = lmf.LexicalEntry( + id=eid, + lemma=lmf.Lemma( + writtenForm=word, + partOfSpeech=pos, + ), + senses=[], + meta={}, + ) + entries[eid]["senses"].extend(senses) + + prev_synsets.append(synset) + + return list(entries.values()) diff --git a/bench/test_bench.py b/bench/test_bench.py new file mode 100644 index 0000000..e4d10a8 --- /dev/null +++ b/bench/test_bench.py @@ -0,0 +1,50 @@ +import wn +from wn import lmf + +import pytest + + +@pytest.mark.benchmark(group="lmf.load", warmup=True) +def test_load(mini_lmf_1_0, benchmark): + benchmark(lmf.load, mini_lmf_1_0) + + +@pytest.mark.benchmark(group="wn.add_lexical_resource") +@pytest.mark.usefixtures('empty_db') +def test_add_lexical_resource(mock_lmf, benchmark): + # TODO: when pytest-benchmark's teardown option is released, use + # that here with more rounds + benchmark.pedantic( + wn.add_lexical_resource, + args=(mock_lmf,), + # teardown=clean_db, + iterations=1, + rounds=1, + ) + + +@pytest.mark.benchmark(group="wn.add_lexical_resource") +@pytest.mark.usefixtures('empty_db') +def test_add_lexical_resource_no_progress(mock_lmf, benchmark): + # TODO: when pytest-benchmark's teardown option is released, use + # that here with more rounds + benchmark.pedantic( + wn.add_lexical_resource, + args=(mock_lmf,), + kwargs={"progress_handler": None}, + # teardown=clean_db, + iterations=1, + rounds=1, + ) + + +@pytest.mark.benchmark(group="primary queries") +@pytest.mark.usefixtures('mock_db') +def test_synsets(mock_db, benchmark): + benchmark(wn.synsets) + + +@pytest.mark.benchmark(group="primary queries") +@pytest.mark.usefixtures('mock_db') +def test_words(mock_db, benchmark): + benchmark(wn.words)