From 97db01770885fe9f29f7f8e815d9ac2a7c2d90cc Mon Sep 17 00:00:00 2001 From: alekszievr <44192193+alekszievr@users.noreply.github.com> Date: Thu, 20 Feb 2025 15:16:58 +0100 Subject: [PATCH] Test: test corpus builder [cog-1234] (#564) ## Description ## DCO Affirmation I affirm that all code in every commit of this pull request conforms to the terms of the Topoteretes Developer Certificate of Origin ## Summary by CodeRabbit - **Chores** - Enhanced the continuous integration workflows with updated dependency management and environment configurations for improved test stability. - **Tests** - Added parameterized unit tests to verify corpus loading and structure, ensuring more robust handling of test data. --- .../eval_framework/corpus_builder_test.py | 29 +++++++++++++++++++ 1 file changed, 29 insertions(+) create mode 100644 cognee/tests/unit/eval_framework/corpus_builder_test.py diff --git a/cognee/tests/unit/eval_framework/corpus_builder_test.py b/cognee/tests/unit/eval_framework/corpus_builder_test.py new file mode 100644 index 00000000..cf328a30 --- /dev/null +++ b/cognee/tests/unit/eval_framework/corpus_builder_test.py @@ -0,0 +1,29 @@ +import pytest +from evals.eval_framework.corpus_builder.corpus_builder_executor import CorpusBuilderExecutor +from cognee.infrastructure.databases.graph import get_graph_engine +from unittest.mock import AsyncMock, patch + +benchmark_options = ["HotPotQA", "Dummy", "TwoWikiMultiHop"] + + +@pytest.mark.parametrize("benchmark", benchmark_options) +def test_corpus_builder_load_corpus(benchmark): + limit = 2 + corpus_builder = CorpusBuilderExecutor(benchmark, "Default") + raw_corpus, questions = corpus_builder.load_corpus(limit=limit) + assert len(raw_corpus) > 0, f"Corpus builder loads empty corpus for {benchmark}" + assert len(questions) <= 2, ( + f"Corpus builder loads {len(questions)} for {benchmark} when limit is {limit}" + ) + + +@pytest.mark.asyncio +@pytest.mark.parametrize("benchmark", benchmark_options) +@patch.object(CorpusBuilderExecutor, "run_cognee", new_callable=AsyncMock) +async def test_corpus_builder_build_corpus(mock_run_cognee, benchmark): + limit = 2 + corpus_builder = CorpusBuilderExecutor(benchmark, "Default") + questions = await corpus_builder.build_corpus(limit=limit) + assert len(questions) <= 2, ( + f"Corpus builder loads {len(questions)} for {benchmark} when limit is {limit}" + )