From 1d6c077156cf2e57f492b0456c6caf672c3cf7b8 Mon Sep 17 00:00:00 2001
From: Navneet Verma <navneev@amazon.com>
Date: Fri, 12 Apr 2024 09:00:50 -0700
Subject: [PATCH] Remove unnecessary toString conversion of vector field and
 added some minor optimization in KNNCodec

Signed-off-by: Navneet Verma <navneev@amazon.com>
---
 CHANGELOG.md                                           |  1 +
 .../opensearch/knn/index/codec/util/KNNCodecUtil.java  |  5 +++++
 .../knn/index/mapper/KNNVectorFieldMapper.java         |  4 ++--
 .../knn/index/mapper/KNNVectorFieldMapperUtil.java     | 10 +++-------
 .../opensearch/knn/index/mapper/LuceneFieldMapper.java |  4 ++--
 5 files changed, 13 insertions(+), 11 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index e0812ab41..91a37642c 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -18,6 +18,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
 * Make the HitQueue size more appropriate for exact search [#1549](https://github.com/opensearch-project/k-NN/pull/1549)
 * Support script score when doc value is disabled [#1573](https://github.com/opensearch-project/k-NN/pull/1573)
 * Implemented the Streaming Feature to stream vectors from Java to JNI layer to enable creation of larger segments for vector indices [#1604](https://github.com/opensearch-project/k-NN/pull/1604)
+* Remove unnecessary toString conversion of vector field and added some minor optimization in KNNCodec [1613](https://github.com/opensearch-project/k-NN/pull/1613)
 ### Bug Fixes
 ### Infrastructure
 * Add micro-benchmark module in k-NN plugin for benchmark streaming vectors to JNI layer functionality. [#1583](https://github.com/opensearch-project/k-NN/pull/1583)
diff --git a/src/main/java/org/opensearch/knn/index/codec/util/KNNCodecUtil.java b/src/main/java/org/opensearch/knn/index/codec/util/KNNCodecUtil.java
index c5ae469e0..e05962608 100644
--- a/src/main/java/org/opensearch/knn/index/codec/util/KNNCodecUtil.java
+++ b/src/main/java/org/opensearch/knn/index/codec/util/KNNCodecUtil.java
@@ -64,6 +64,11 @@ public static KNNCodecUtil.Pair getFloats(BinaryDocValues values) throws IOExcep
 
                 if (vectorsPerTransfer == Integer.MIN_VALUE) {
                     vectorsPerTransfer = (dimension * Float.BYTES * totalLiveDocs) / vectorsStreamingMemoryLimit;
+                    // This condition comes if vectorsStreamingMemoryLimit is higher than total number floats to transfer
+                    // Doing this will reduce 1 extra trip to JNI layer.
+                    if (vectorsPerTransfer == 0) {
+                        vectorsPerTransfer = totalLiveDocs;
+                    }
                 }
                 if (vectorList.size() == vectorsPerTransfer) {
                     vectorAddress = JNICommons.storeVectorData(
diff --git a/src/main/java/org/opensearch/knn/index/mapper/KNNVectorFieldMapper.java b/src/main/java/org/opensearch/knn/index/mapper/KNNVectorFieldMapper.java
index a36a4222b..0fa026f34 100644
--- a/src/main/java/org/opensearch/knn/index/mapper/KNNVectorFieldMapper.java
+++ b/src/main/java/org/opensearch/knn/index/mapper/KNNVectorFieldMapper.java
@@ -561,7 +561,7 @@ protected void parseCreateField(ParseContext context, int dimension, SpaceType s
             VectorField point = new VectorField(name(), array, fieldType);
 
             context.doc().add(point);
-            addStoredFieldForVectorField(context, fieldType, name(), point.toString());
+            addStoredFieldForVectorField(context, fieldType, name(), point);
         } else if (VectorDataType.FLOAT == vectorDataType) {
             Optional<float[]> floatsArrayOptional = getFloatsFromContext(context, dimension, methodComponentContext);
 
@@ -572,7 +572,7 @@ protected void parseCreateField(ParseContext context, int dimension, SpaceType s
             spaceType.validateVector(array);
             VectorField point = new VectorField(name(), array, fieldType);
             context.doc().add(point);
-            addStoredFieldForVectorField(context, fieldType, name(), point.toString());
+            addStoredFieldForVectorField(context, fieldType, name(), point);
         } else {
             throw new IllegalArgumentException(
                 String.format(Locale.ROOT, "Cannot parse context for unsupported values provided for field [%s]", VECTOR_DATA_TYPE_FIELD)
diff --git a/src/main/java/org/opensearch/knn/index/mapper/KNNVectorFieldMapperUtil.java b/src/main/java/org/opensearch/knn/index/mapper/KNNVectorFieldMapperUtil.java
index 283d35f00..074be0375 100644
--- a/src/main/java/org/opensearch/knn/index/mapper/KNNVectorFieldMapperUtil.java
+++ b/src/main/java/org/opensearch/knn/index/mapper/KNNVectorFieldMapperUtil.java
@@ -13,6 +13,7 @@
 
 import lombok.AccessLevel;
 import lombok.NoArgsConstructor;
+import org.apache.lucene.document.Field;
 import org.apache.lucene.document.FieldType;
 import org.apache.lucene.document.StoredField;
 import org.apache.lucene.index.DocValuesType;
@@ -135,14 +136,9 @@ public static FieldType buildDocValuesFieldType(KNNEngine knnEngine) {
         return field;
     }
 
-    public static void addStoredFieldForVectorField(
-        ParseContext context,
-        FieldType fieldType,
-        String mapperName,
-        String vectorFieldAsString
-    ) {
+    public static void addStoredFieldForVectorField(ParseContext context, FieldType fieldType, String mapperName, Field vectorField) {
         if (fieldType.stored()) {
-            context.doc().add(new StoredField(mapperName, vectorFieldAsString));
+            context.doc().add(new StoredField(mapperName, vectorField.toString()));
         }
     }
 }
diff --git a/src/main/java/org/opensearch/knn/index/mapper/LuceneFieldMapper.java b/src/main/java/org/opensearch/knn/index/mapper/LuceneFieldMapper.java
index 185ab3dc4..d61fa1150 100644
--- a/src/main/java/org/opensearch/knn/index/mapper/LuceneFieldMapper.java
+++ b/src/main/java/org/opensearch/knn/index/mapper/LuceneFieldMapper.java
@@ -92,7 +92,7 @@ protected void parseCreateField(ParseContext context, int dimension, SpaceType s
             KnnByteVectorField point = new KnnByteVectorField(name(), array, fieldType);
 
             context.doc().add(point);
-            addStoredFieldForVectorField(context, fieldType, name(), point.toString());
+            addStoredFieldForVectorField(context, fieldType, name(), point);
 
             if (hasDocValues && vectorFieldType != null) {
                 context.doc().add(new VectorField(name(), array, vectorFieldType));
@@ -108,7 +108,7 @@ protected void parseCreateField(ParseContext context, int dimension, SpaceType s
             KnnVectorField point = new KnnVectorField(name(), array, fieldType);
 
             context.doc().add(point);
-            addStoredFieldForVectorField(context, fieldType, name(), point.toString());
+            addStoredFieldForVectorField(context, fieldType, name(), point);
 
             if (hasDocValues && vectorFieldType != null) {
                 context.doc().add(new VectorField(name(), array, vectorFieldType));