Skip to content

Commit

Permalink
Actually add bench/ directory...
Browse files Browse the repository at this point in the history
  • Loading branch information
goodmami committed Feb 28, 2025
1 parent 7659f9f commit e026055
Show file tree
Hide file tree
Showing 2 changed files with 221 additions and 0 deletions.
171 changes: 171 additions & 0 deletions bench/conftest.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,171 @@
import tempfile
from collections.abc import Iterator
from itertools import product, cycle
from pathlib import Path

import pytest

import wn
from wn import lmf


@pytest.fixture
def clean_db():

def clean_db():
wn.remove("*")
dummy_lex = lmf.Lexicon(
id="dummy",
version="1",
label="placeholder to initialize the db",
language="zxx",
email="",
license="",
)
wn.add_lexical_resource(
lmf.LexicalResource(lmf_version="1.3", lexicons=[dummy_lex])
)

return clean_db


@pytest.fixture(scope="session")
def datadir():
return Path(__file__).parent.parent / "tests" / "data"


@pytest.fixture(scope="session")
def mini_lmf_1_0(datadir):
return datadir / "mini-lmf-1.0.xml"


@pytest.fixture(scope="session")
def empty_db_dir():
with tempfile.TemporaryDirectory("wn_data_empty") as dir:
yield Path(dir)


@pytest.fixture
def empty_db(monkeypatch, empty_db_dir, clean_db):
with monkeypatch.context() as m:
m.setattr(wn.config, "data_directory", empty_db_dir)
clean_db()
yield


@pytest.fixture(scope="session")
def mock_lmf():
synsets: list[lmf.Synset] = [
* _make_synsets("n", 20000),
* _make_synsets("v", 10000),
* _make_synsets("a", 2000),
* _make_synsets("r", 1000),
]
entries = _make_entries(synsets)
lexicon = lmf.Lexicon(
id="mock",
version="1",
label="",
language="zxx",
email="",
license="",
entries=entries,
synsets=synsets,
)
return lmf.LexicalResource(lmf_version="1.3", lexicons=[lexicon])


@pytest.fixture(scope="session")
def mock_db_dir(mock_lmf):
with tempfile.TemporaryDirectory("wn_data_empty") as dir:
old_data_dir = wn.config.data_directory
wn.config.data_directory = dir
wn.add_lexical_resource(mock_lmf, progress_handler=None)
wn.config.data_directory = old_data_dir
yield Path(dir)
# close any open DB connections before teardown
for conn in wn._db.pool.values():
conn.close()


@pytest.fixture
def mock_db(monkeypatch, mock_db_dir):
with monkeypatch.context() as m:
m.setattr(wn.config, "data_directory", mock_db_dir)
yield


def _make_synsets(pos: str, n: int) -> list[lmf.Synset]:
synsets: list[lmf.Synset] = [
lmf.Synset(
id=f"{i}-{pos}",
ili="",
partOfSpeech=pos,
relations=[],
meta={},
)
for i in range(1, n+1)
]
# add relations for nouns and verbs
if pos in "nv":
total = len(synsets)
tgt_i = 1 # index of next target synset
n = cycle([2]) # how many targets to relate
for cur_i in range(total):
if tgt_i <= cur_i:
tgt_i = cur_i + 1
source = synsets[cur_i]
for cur_k in range(tgt_i, tgt_i + next(n)):
if cur_k >= total:
break
target = synsets[cur_k]
source["relations"].append(
lmf.Relation(target=target["id"], relType="hyponym", meta={})
)
target["relations"].append(
lmf.Relation(target=source["id"], relType="hypernym", meta={})
)
tgt_i = cur_k + 1

return synsets


def _words() -> Iterator[str]:
consonants = "kgtdpbfvszrlmnhw"
vowels = "aeiou"
while True:
yield from map("".join, product(consonants, vowels, consonants, vowels))


def _make_entries(synsets: list[lmf.Synset]) -> list[lmf.LexicalEntry]:
words = _words()
member_count = cycle(range(1, 4)) # 1, 2, or 3 synset members
entries: dict[str, lmf.LexicalEntry] = {}
prev_synsets: list[lmf.Synset] = []
for synset in synsets:
ssid = synset["id"]
pos = synset["partOfSpeech"]

for _ in range(next(member_count)):
word = next(words)
senses = [lmf.Sense(id=f"{word}-{ssid}", synset=ssid, meta={})]
# add some polysemy
if prev_synsets:
ssid2 = prev_synsets.pop()["id"]
senses.append(lmf.Sense(id=f"{word}-{ssid2}", synset=ssid2, meta={}))
eid = f"{word}-{pos}"
if eid not in entries:
entries[eid] = lmf.LexicalEntry(
id=eid,
lemma=lmf.Lemma(
writtenForm=word,
partOfSpeech=pos,
),
senses=[],
meta={},
)
entries[eid]["senses"].extend(senses)

prev_synsets.append(synset)

return list(entries.values())
50 changes: 50 additions & 0 deletions bench/test_bench.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
import wn
from wn import lmf

import pytest


@pytest.mark.benchmark(group="lmf.load", warmup=True)
def test_load(mini_lmf_1_0, benchmark):
benchmark(lmf.load, mini_lmf_1_0)


@pytest.mark.benchmark(group="wn.add_lexical_resource")
@pytest.mark.usefixtures('empty_db')
def test_add_lexical_resource(mock_lmf, benchmark):
# TODO: when pytest-benchmark's teardown option is released, use
# that here with more rounds
benchmark.pedantic(
wn.add_lexical_resource,
args=(mock_lmf,),
# teardown=clean_db,
iterations=1,
rounds=1,
)


@pytest.mark.benchmark(group="wn.add_lexical_resource")
@pytest.mark.usefixtures('empty_db')
def test_add_lexical_resource_no_progress(mock_lmf, benchmark):
# TODO: when pytest-benchmark's teardown option is released, use
# that here with more rounds
benchmark.pedantic(
wn.add_lexical_resource,
args=(mock_lmf,),
kwargs={"progress_handler": None},
# teardown=clean_db,
iterations=1,
rounds=1,
)


@pytest.mark.benchmark(group="primary queries")
@pytest.mark.usefixtures('mock_db')
def test_synsets(mock_db, benchmark):
benchmark(wn.synsets)


@pytest.mark.benchmark(group="primary queries")
@pytest.mark.usefixtures('mock_db')
def test_words(mock_db, benchmark):
benchmark(wn.words)

0 comments on commit e026055

Please sign in to comment.