Actually add bench/ directory...

goodmami · Feb 28, 2025 · e026055 · e026055
1 parent 7659f9f
commit e026055
Show file tree

Hide file tree

Showing 2 changed files with 221 additions and 0 deletions.
diff --git a/bench/conftest.py b/bench/conftest.py
@@ -0,0 +1,171 @@
+import tempfile
+from collections.abc import Iterator
+from itertools import product, cycle
+from pathlib import Path
+
+import pytest
+
+import wn
+from wn import lmf
+
+
+@pytest.fixture
+def clean_db():
+
+    def clean_db():
+        wn.remove("*")
+        dummy_lex = lmf.Lexicon(
+            id="dummy",
+            version="1",
+            label="placeholder to initialize the db",
+            language="zxx",
+            email="",
+            license="",
+        )
+        wn.add_lexical_resource(
+            lmf.LexicalResource(lmf_version="1.3", lexicons=[dummy_lex])
+        )
+
+    return clean_db
+
+
+@pytest.fixture(scope="session")
+def datadir():
+    return Path(__file__).parent.parent / "tests" / "data"
+
+
+@pytest.fixture(scope="session")
+def mini_lmf_1_0(datadir):
+    return datadir / "mini-lmf-1.0.xml"
+
+
+@pytest.fixture(scope="session")
+def empty_db_dir():
+    with tempfile.TemporaryDirectory("wn_data_empty") as dir:
+        yield Path(dir)
+
+
+@pytest.fixture
+def empty_db(monkeypatch, empty_db_dir, clean_db):
+    with monkeypatch.context() as m:
+        m.setattr(wn.config, "data_directory", empty_db_dir)
+        clean_db()
+        yield
+
+
+@pytest.fixture(scope="session")
+def mock_lmf():
+    synsets: list[lmf.Synset] = [
+       * _make_synsets("n", 20000),
+       * _make_synsets("v", 10000),
+       * _make_synsets("a", 2000),
+       * _make_synsets("r", 1000),
+    ]
+    entries = _make_entries(synsets)
+    lexicon = lmf.Lexicon(
+        id="mock",
+        version="1",
+        label="",
+        language="zxx",
+        email="",
+        license="",
+        entries=entries,
+        synsets=synsets,
+    )
+    return lmf.LexicalResource(lmf_version="1.3", lexicons=[lexicon])
+
+
+@pytest.fixture(scope="session")
+def mock_db_dir(mock_lmf):
+    with tempfile.TemporaryDirectory("wn_data_empty") as dir:
+        old_data_dir = wn.config.data_directory
+        wn.config.data_directory = dir
+        wn.add_lexical_resource(mock_lmf, progress_handler=None)
+        wn.config.data_directory = old_data_dir
+        yield Path(dir)
+        # close any open DB connections before teardown
+        for conn in wn._db.pool.values():
+            conn.close()
+
+
+@pytest.fixture
+def mock_db(monkeypatch, mock_db_dir):
+    with monkeypatch.context() as m:
+        m.setattr(wn.config, "data_directory", mock_db_dir)
+        yield
+
+
+def _make_synsets(pos: str, n: int) -> list[lmf.Synset]:
+    synsets: list[lmf.Synset] = [
+        lmf.Synset(
+            id=f"{i}-{pos}",
+            ili="",
+            partOfSpeech=pos,
+            relations=[],
+            meta={},
+        )
+        for i in range(1, n+1)
+    ]
+    # add relations for nouns and verbs
+    if pos in "nv":
+        total = len(synsets)
+        tgt_i = 1  # index of next target synset
+        n = cycle([2])  # how many targets to relate
+        for cur_i in range(total):
+            if tgt_i <= cur_i:
+                tgt_i = cur_i + 1
+            source = synsets[cur_i]
+            for cur_k in range(tgt_i, tgt_i + next(n)):
+                if cur_k >= total:
+                    break
+                target = synsets[cur_k]
+                source["relations"].append(
+                    lmf.Relation(target=target["id"], relType="hyponym", meta={})
+                )
+                target["relations"].append(
+                    lmf.Relation(target=source["id"], relType="hypernym", meta={})
+                )
+            tgt_i = cur_k + 1
+
+    return synsets
+
+
+def _words() -> Iterator[str]:
+    consonants = "kgtdpbfvszrlmnhw"
+    vowels = "aeiou"
+    while True:
+        yield from map("".join, product(consonants, vowels, consonants, vowels))
+
+
+def _make_entries(synsets: list[lmf.Synset]) -> list[lmf.LexicalEntry]:
+    words = _words()
+    member_count = cycle(range(1, 4))  # 1, 2, or 3 synset members
+    entries: dict[str, lmf.LexicalEntry] = {}
+    prev_synsets: list[lmf.Synset] = []
+    for synset in synsets:
+        ssid = synset["id"]
+        pos = synset["partOfSpeech"]
+
+        for _ in range(next(member_count)):
+            word = next(words)
+            senses = [lmf.Sense(id=f"{word}-{ssid}", synset=ssid, meta={})]
+            # add some polysemy
+            if prev_synsets:
+                ssid2 = prev_synsets.pop()["id"]
+                senses.append(lmf.Sense(id=f"{word}-{ssid2}", synset=ssid2, meta={}))
+            eid = f"{word}-{pos}"
+            if eid not in entries:
+                entries[eid] = lmf.LexicalEntry(
+                    id=eid,
+                    lemma=lmf.Lemma(
+                        writtenForm=word,
+                        partOfSpeech=pos,
+                    ),
+                    senses=[],
+                    meta={},
+                )
+            entries[eid]["senses"].extend(senses)
+
+        prev_synsets.append(synset)
+
+    return list(entries.values())
diff --git a/bench/test_bench.py b/bench/test_bench.py
@@ -0,0 +1,50 @@
+import wn
+from wn import lmf
+
+import pytest
+
+
+@pytest.mark.benchmark(group="lmf.load", warmup=True)
+def test_load(mini_lmf_1_0, benchmark):
+    benchmark(lmf.load, mini_lmf_1_0)
+
+
+@pytest.mark.benchmark(group="wn.add_lexical_resource")
+@pytest.mark.usefixtures('empty_db')
+def test_add_lexical_resource(mock_lmf, benchmark):
+    # TODO: when pytest-benchmark's teardown option is released, use
+    # that here with more rounds
+    benchmark.pedantic(
+        wn.add_lexical_resource,
+        args=(mock_lmf,),
+        # teardown=clean_db,
+        iterations=1,
+        rounds=1,
+    )
+
+
+@pytest.mark.benchmark(group="wn.add_lexical_resource")
+@pytest.mark.usefixtures('empty_db')
+def test_add_lexical_resource_no_progress(mock_lmf, benchmark):
+    # TODO: when pytest-benchmark's teardown option is released, use
+    # that here with more rounds
+    benchmark.pedantic(
+        wn.add_lexical_resource,
+        args=(mock_lmf,),
+        kwargs={"progress_handler": None},
+        # teardown=clean_db,
+        iterations=1,
+        rounds=1,
+    )
+
+
+@pytest.mark.benchmark(group="primary queries")
+@pytest.mark.usefixtures('mock_db')
+def test_synsets(mock_db, benchmark):
+    benchmark(wn.synsets)
+
+
+@pytest.mark.benchmark(group="primary queries")
+@pytest.mark.usefixtures('mock_db')
+def test_words(mock_db, benchmark):
+    benchmark(wn.words)