Update cosine score translation for nmslib

Nmslib has different score range than exact search. After introducing approximate threshold setting, now, both exact search and approx search can be executed as part of same knn search api. Hence, to keep it consitent, we are overriding the score translation for nmslib for approx search Signed-off-by: Vijayan Balasubramanian <[email protected]>
opensearch-project · Dec 26, 2024 · f14147b · f14147b
1 parent c728f02
commit f14147b
Show file tree

Hide file tree

Showing 3 changed files with 68 additions and 2 deletions.
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -28,6 +28,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
 * Allow validation for non knn index only after 2.17.0 (#2315)[https://github.com/opensearch-project/k-NN/pull/2315]
 * Release query vector memory after execution (#2346)[https://github.com/opensearch-project/k-NN/pull/2346]
 * Fix shard level rescoring disabled setting flag (#2352)[https://github.com/opensearch-project/k-NN/pull/2352]
+* Update cosine score translation for nmslib (#2357)[https://github.com/opensearch-project/k-NN/pull/2357]
 ### Infrastructure
 * Updated C++ version in JNI from c++11 to c++17 [#2259](https://github.com/opensearch-project/k-NN/pull/2259)
 * Upgrade bytebuddy and objenesis version to match OpenSearch core and, update github ci runner for macos [#2279](https://github.com/opensearch-project/k-NN/pull/2279)

diff --git a/src/main/java/org/opensearch/knn/index/engine/nmslib/Nmslib.java b/src/main/java/org/opensearch/knn/index/engine/nmslib/Nmslib.java
@@ -14,7 +14,6 @@
 import org.opensearch.knn.index.engine.NativeLibrary;
 import org.opensearch.knn.index.engine.ResolvedMethodContext;
 
-import java.util.Collections;
 import java.util.Map;
 import java.util.function.Function;
 
@@ -30,8 +29,16 @@ public class Nmslib extends NativeLibrary {
 
     final static Map<String, KNNMethod> METHODS = ImmutableMap.of(METHOD_HNSW, new NmslibHNSWMethod());
 
-    public final static Nmslib INSTANCE = new Nmslib(METHODS, Collections.emptyMap(), CURRENT_VERSION, EXTENSION);
     private final MethodResolver methodResolver;
+    // Map that overrides OpenSearch score translation by space type of scores returned by nmslib
+    private final static Map<SpaceType, Function<Float, Float>> SCORE_TRANSLATIONS = ImmutableMap.of(
+        SpaceType.COSINESIMIL,
+        // To be consistent with exact search, we will be using same formula used by lucene as mentioned
+        // here
+        // https://github.com/apache/lucene/blob/0494c824e0ac8049b757582f60d085932a890800/lucene/core/src/java/org/apache/lucene/index/VectorSimilarityFunction.java#L73
+        rawScore -> Math.max((2.0F - rawScore) / 2.0F, 0.0F)
+    );
+    public final static Nmslib INSTANCE = new Nmslib(METHODS, SCORE_TRANSLATIONS, CURRENT_VERSION, EXTENSION);
 
     /**
      * Constructor for Nmslib

diff --git a/src/test/java/org/opensearch/knn/index/NmslibIT.java b/src/test/java/org/opensearch/knn/index/NmslibIT.java
@@ -195,6 +195,64 @@ public void testEndToEnd() throws Exception {
         fail("Graphs are not getting evicted");
     }
 
+    public void testEndToEnd_withApproxAndExactSearch_inSameIndex_ForCosineSpaceType() throws Exception {
+        String indexName = "test-index-1";
+        String fieldName = "test-field-1";
+        SpaceType spaceType = SpaceType.COSINESIMIL;
+        Integer dimension = testData.indexData.vectors[0].length;
+
+        // Create an index
+        XContentBuilder builder = XContentFactory.jsonBuilder()
+            .startObject()
+            .startObject("properties")
+            .startObject(fieldName)
+            .field("type", "knn_vector")
+            .field("dimension", dimension)
+            .field(KNNConstants.METHOD_PARAMETER_SPACE_TYPE, spaceType.getValue())
+            .startObject(KNNConstants.KNN_METHOD)
+            .field(KNNConstants.NAME, KNNConstants.METHOD_HNSW)
+            .field(KNNConstants.KNN_ENGINE, KNNEngine.NMSLIB.getName())
+            .endObject()
+            .endObject()
+            .endObject()
+            .endObject();
+
+        Map<String, Object> mappingMap = xContentBuilderToMap(builder);
+        String mapping = builder.toString();
+
+        createKnnIndex(indexName, buildKNNIndexSettings(0), mapping);
+
+        // Index one document
+        addKnnDoc(indexName, randomAlphaOfLength(5), fieldName, Floats.asList(testData.indexData.vectors[0]).toArray());
+
+        // Assert we have the right number of documents in the index
+        refreshAllIndices();
+        assertEquals(1, getDocCount(indexName));
+        // update threshold setting to skip building graph
+        updateIndexSettings(indexName, Settings.builder().put(KNNSettings.INDEX_KNN_ADVANCED_APPROXIMATE_THRESHOLD, -1));
+        // add duplicate document with different id
+        addKnnDoc(indexName, randomAlphaOfLength(5), fieldName, Floats.asList(testData.indexData.vectors[0]).toArray());
+        assertEquals(2, getDocCount(indexName));
+        final int k = 2;
+        // search index
+        Response response = searchKNNIndex(
+            indexName,
+            KNNQueryBuilder.builder().fieldName(fieldName).vector(testData.queries[0]).k(k).build(),
+            k
+        );
+        String responseBody = EntityUtils.toString(response.getEntity());
+        List<KNNResult> knnResults = parseSearchResponse(responseBody, fieldName);
+        assertEquals(k, knnResults.size());
+
+        List<Float> actualScores = parseSearchResponseScore(responseBody, fieldName);
+
+        // both document should have identical score
+        assertEquals(actualScores.get(0), actualScores.get(1), 0.001);
+
+        // Delete index
+        deleteKNNIndex(indexName);
+    }
+
     @SneakyThrows
     private void validateSearch(
         final String indexName,