Skip to content

Commit

Permalink
[Elasticsearch] - BM25 retrieval: not all terms must mandatorily match (
Browse files Browse the repository at this point in the history
#125)

* make bm25 less restrictive

* leftover

* improve test

* improve test format
  • Loading branch information
anakin87 authored Dec 20, 2023
1 parent c17ddda commit e7d79e7
Show file tree
Hide file tree
Showing 2 changed files with 28 additions and 1 deletion.
Original file line number Diff line number Diff line change
Expand Up @@ -263,7 +263,7 @@ def _bm25_retrieval(
"query": query,
"fuzziness": fuzziness,
"type": "most_fields",
"operator": "AND",
"operator": "OR",
}
}
]
Expand Down
27 changes: 27 additions & 0 deletions integrations/elasticsearch/tests/test_document_store.py
Original file line number Diff line number Diff line change
Expand Up @@ -182,6 +182,33 @@ def test_bm25_retrieval_with_fuzziness(self, document_store: ElasticsearchDocume
assert "functional" in res[1].content
assert "functional" in res[2].content

def test_bm25_not_all_terms_must_match(self, document_store: ElasticsearchDocumentStore):
"""
Test that not all terms must mandatorily match for BM25 retrieval to return a result.
"""
documents = [
Document(id=1, content="There are over 7,000 languages spoken around the world today."),
Document(
id=2,
content=(
"Elephants have been observed to behave in a way that indicates a high level of self-awareness"
" such as recognizing themselves in mirrors."
),
),
Document(
id=3,
content=(
"In certain parts of the world, like the Maldives, Puerto Rico, and San Diego, you can witness"
" the phenomenon of bioluminescent waves."
),
),
]
document_store.write_documents(documents)

res = document_store._bm25_retrieval("How much self awareness do elephants have?", top_k=3)
assert len(res) == 1
assert res[0].id == 2

def test_embedding_retrieval(self, document_store: ElasticsearchDocumentStore):
docs = [
Document(content="Most similar document", embedding=[1.0, 1.0, 1.0, 1.0]),
Expand Down

0 comments on commit e7d79e7

Please sign in to comment.