Skip to content

Commit

Permalink
Update cosine score translation for nmslib
Browse files Browse the repository at this point in the history
Nmslib has different score range than exact search. After introducing
approximate threshold setting, now, both exact search and approx search
can be executed as part of same knn search api. Hence, to keep it consitent,
we are overriding the score translation for nmslib for approx search

Signed-off-by: Vijayan Balasubramanian <[email protected]>
  • Loading branch information
VijayanB committed Dec 26, 2024
1 parent c728f02 commit f14147b
Show file tree
Hide file tree
Showing 3 changed files with 68 additions and 2 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
* Allow validation for non knn index only after 2.17.0 (#2315)[https://github.com/opensearch-project/k-NN/pull/2315]
* Release query vector memory after execution (#2346)[https://github.com/opensearch-project/k-NN/pull/2346]
* Fix shard level rescoring disabled setting flag (#2352)[https://github.com/opensearch-project/k-NN/pull/2352]
* Update cosine score translation for nmslib (#2357)[https://github.com/opensearch-project/k-NN/pull/2357]
### Infrastructure
* Updated C++ version in JNI from c++11 to c++17 [#2259](https://github.com/opensearch-project/k-NN/pull/2259)
* Upgrade bytebuddy and objenesis version to match OpenSearch core and, update github ci runner for macos [#2279](https://github.com/opensearch-project/k-NN/pull/2279)
Expand Down
11 changes: 9 additions & 2 deletions src/main/java/org/opensearch/knn/index/engine/nmslib/Nmslib.java
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,6 @@
import org.opensearch.knn.index.engine.NativeLibrary;
import org.opensearch.knn.index.engine.ResolvedMethodContext;

import java.util.Collections;
import java.util.Map;
import java.util.function.Function;

Expand All @@ -30,8 +29,16 @@ public class Nmslib extends NativeLibrary {

final static Map<String, KNNMethod> METHODS = ImmutableMap.of(METHOD_HNSW, new NmslibHNSWMethod());

public final static Nmslib INSTANCE = new Nmslib(METHODS, Collections.emptyMap(), CURRENT_VERSION, EXTENSION);
private final MethodResolver methodResolver;
// Map that overrides OpenSearch score translation by space type of scores returned by nmslib
private final static Map<SpaceType, Function<Float, Float>> SCORE_TRANSLATIONS = ImmutableMap.of(
SpaceType.COSINESIMIL,
// To be consistent with exact search, we will be using same formula used by lucene as mentioned
// here
// https://github.com/apache/lucene/blob/0494c824e0ac8049b757582f60d085932a890800/lucene/core/src/java/org/apache/lucene/index/VectorSimilarityFunction.java#L73
rawScore -> Math.max((2.0F - rawScore) / 2.0F, 0.0F)
);
public final static Nmslib INSTANCE = new Nmslib(METHODS, SCORE_TRANSLATIONS, CURRENT_VERSION, EXTENSION);

/**
* Constructor for Nmslib
Expand Down
58 changes: 58 additions & 0 deletions src/test/java/org/opensearch/knn/index/NmslibIT.java
Original file line number Diff line number Diff line change
Expand Up @@ -195,6 +195,64 @@ public void testEndToEnd() throws Exception {
fail("Graphs are not getting evicted");
}

public void testEndToEnd_withApproxAndExactSearch_inSameIndex_ForCosineSpaceType() throws Exception {
String indexName = "test-index-1";
String fieldName = "test-field-1";
SpaceType spaceType = SpaceType.COSINESIMIL;
Integer dimension = testData.indexData.vectors[0].length;

// Create an index
XContentBuilder builder = XContentFactory.jsonBuilder()
.startObject()
.startObject("properties")
.startObject(fieldName)
.field("type", "knn_vector")
.field("dimension", dimension)
.field(KNNConstants.METHOD_PARAMETER_SPACE_TYPE, spaceType.getValue())
.startObject(KNNConstants.KNN_METHOD)
.field(KNNConstants.NAME, KNNConstants.METHOD_HNSW)
.field(KNNConstants.KNN_ENGINE, KNNEngine.NMSLIB.getName())
.endObject()
.endObject()
.endObject()
.endObject();

Map<String, Object> mappingMap = xContentBuilderToMap(builder);
String mapping = builder.toString();

createKnnIndex(indexName, buildKNNIndexSettings(0), mapping);

// Index one document
addKnnDoc(indexName, randomAlphaOfLength(5), fieldName, Floats.asList(testData.indexData.vectors[0]).toArray());

// Assert we have the right number of documents in the index
refreshAllIndices();
assertEquals(1, getDocCount(indexName));
// update threshold setting to skip building graph
updateIndexSettings(indexName, Settings.builder().put(KNNSettings.INDEX_KNN_ADVANCED_APPROXIMATE_THRESHOLD, -1));
// add duplicate document with different id
addKnnDoc(indexName, randomAlphaOfLength(5), fieldName, Floats.asList(testData.indexData.vectors[0]).toArray());
assertEquals(2, getDocCount(indexName));
final int k = 2;
// search index
Response response = searchKNNIndex(
indexName,
KNNQueryBuilder.builder().fieldName(fieldName).vector(testData.queries[0]).k(k).build(),
k
);
String responseBody = EntityUtils.toString(response.getEntity());
List<KNNResult> knnResults = parseSearchResponse(responseBody, fieldName);
assertEquals(k, knnResults.size());

List<Float> actualScores = parseSearchResponseScore(responseBody, fieldName);

// both document should have identical score
assertEquals(actualScores.get(0), actualScores.get(1), 0.001);

// Delete index
deleteKNNIndex(indexName);
}

@SneakyThrows
private void validateSearch(
final String indexName,
Expand Down

0 comments on commit f14147b

Please sign in to comment.