From 1d6c077156cf2e57f492b0456c6caf672c3cf7b8 Mon Sep 17 00:00:00 2001 From: Navneet Verma <navneev@amazon.com> Date: Fri, 12 Apr 2024 09:00:50 -0700 Subject: [PATCH] Remove unnecessary toString conversion of vector field and added some minor optimization in KNNCodec Signed-off-by: Navneet Verma <navneev@amazon.com> --- CHANGELOG.md | 1 + .../opensearch/knn/index/codec/util/KNNCodecUtil.java | 5 +++++ .../knn/index/mapper/KNNVectorFieldMapper.java | 4 ++-- .../knn/index/mapper/KNNVectorFieldMapperUtil.java | 10 +++------- .../opensearch/knn/index/mapper/LuceneFieldMapper.java | 4 ++-- 5 files changed, 13 insertions(+), 11 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index e0812ab41..91a37642c 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -18,6 +18,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), * Make the HitQueue size more appropriate for exact search [#1549](https://github.com/opensearch-project/k-NN/pull/1549) * Support script score when doc value is disabled [#1573](https://github.com/opensearch-project/k-NN/pull/1573) * Implemented the Streaming Feature to stream vectors from Java to JNI layer to enable creation of larger segments for vector indices [#1604](https://github.com/opensearch-project/k-NN/pull/1604) +* Remove unnecessary toString conversion of vector field and added some minor optimization in KNNCodec [1613](https://github.com/opensearch-project/k-NN/pull/1613) ### Bug Fixes ### Infrastructure * Add micro-benchmark module in k-NN plugin for benchmark streaming vectors to JNI layer functionality. [#1583](https://github.com/opensearch-project/k-NN/pull/1583) diff --git a/src/main/java/org/opensearch/knn/index/codec/util/KNNCodecUtil.java b/src/main/java/org/opensearch/knn/index/codec/util/KNNCodecUtil.java index c5ae469e0..e05962608 100644 --- a/src/main/java/org/opensearch/knn/index/codec/util/KNNCodecUtil.java +++ b/src/main/java/org/opensearch/knn/index/codec/util/KNNCodecUtil.java @@ -64,6 +64,11 @@ public static KNNCodecUtil.Pair getFloats(BinaryDocValues values) throws IOExcep if (vectorsPerTransfer == Integer.MIN_VALUE) { vectorsPerTransfer = (dimension * Float.BYTES * totalLiveDocs) / vectorsStreamingMemoryLimit; + // This condition comes if vectorsStreamingMemoryLimit is higher than total number floats to transfer + // Doing this will reduce 1 extra trip to JNI layer. + if (vectorsPerTransfer == 0) { + vectorsPerTransfer = totalLiveDocs; + } } if (vectorList.size() == vectorsPerTransfer) { vectorAddress = JNICommons.storeVectorData( diff --git a/src/main/java/org/opensearch/knn/index/mapper/KNNVectorFieldMapper.java b/src/main/java/org/opensearch/knn/index/mapper/KNNVectorFieldMapper.java index a36a4222b..0fa026f34 100644 --- a/src/main/java/org/opensearch/knn/index/mapper/KNNVectorFieldMapper.java +++ b/src/main/java/org/opensearch/knn/index/mapper/KNNVectorFieldMapper.java @@ -561,7 +561,7 @@ protected void parseCreateField(ParseContext context, int dimension, SpaceType s VectorField point = new VectorField(name(), array, fieldType); context.doc().add(point); - addStoredFieldForVectorField(context, fieldType, name(), point.toString()); + addStoredFieldForVectorField(context, fieldType, name(), point); } else if (VectorDataType.FLOAT == vectorDataType) { Optional<float[]> floatsArrayOptional = getFloatsFromContext(context, dimension, methodComponentContext); @@ -572,7 +572,7 @@ protected void parseCreateField(ParseContext context, int dimension, SpaceType s spaceType.validateVector(array); VectorField point = new VectorField(name(), array, fieldType); context.doc().add(point); - addStoredFieldForVectorField(context, fieldType, name(), point.toString()); + addStoredFieldForVectorField(context, fieldType, name(), point); } else { throw new IllegalArgumentException( String.format(Locale.ROOT, "Cannot parse context for unsupported values provided for field [%s]", VECTOR_DATA_TYPE_FIELD) diff --git a/src/main/java/org/opensearch/knn/index/mapper/KNNVectorFieldMapperUtil.java b/src/main/java/org/opensearch/knn/index/mapper/KNNVectorFieldMapperUtil.java index 283d35f00..074be0375 100644 --- a/src/main/java/org/opensearch/knn/index/mapper/KNNVectorFieldMapperUtil.java +++ b/src/main/java/org/opensearch/knn/index/mapper/KNNVectorFieldMapperUtil.java @@ -13,6 +13,7 @@ import lombok.AccessLevel; import lombok.NoArgsConstructor; +import org.apache.lucene.document.Field; import org.apache.lucene.document.FieldType; import org.apache.lucene.document.StoredField; import org.apache.lucene.index.DocValuesType; @@ -135,14 +136,9 @@ public static FieldType buildDocValuesFieldType(KNNEngine knnEngine) { return field; } - public static void addStoredFieldForVectorField( - ParseContext context, - FieldType fieldType, - String mapperName, - String vectorFieldAsString - ) { + public static void addStoredFieldForVectorField(ParseContext context, FieldType fieldType, String mapperName, Field vectorField) { if (fieldType.stored()) { - context.doc().add(new StoredField(mapperName, vectorFieldAsString)); + context.doc().add(new StoredField(mapperName, vectorField.toString())); } } } diff --git a/src/main/java/org/opensearch/knn/index/mapper/LuceneFieldMapper.java b/src/main/java/org/opensearch/knn/index/mapper/LuceneFieldMapper.java index 185ab3dc4..d61fa1150 100644 --- a/src/main/java/org/opensearch/knn/index/mapper/LuceneFieldMapper.java +++ b/src/main/java/org/opensearch/knn/index/mapper/LuceneFieldMapper.java @@ -92,7 +92,7 @@ protected void parseCreateField(ParseContext context, int dimension, SpaceType s KnnByteVectorField point = new KnnByteVectorField(name(), array, fieldType); context.doc().add(point); - addStoredFieldForVectorField(context, fieldType, name(), point.toString()); + addStoredFieldForVectorField(context, fieldType, name(), point); if (hasDocValues && vectorFieldType != null) { context.doc().add(new VectorField(name(), array, vectorFieldType)); @@ -108,7 +108,7 @@ protected void parseCreateField(ParseContext context, int dimension, SpaceType s KnnVectorField point = new KnnVectorField(name(), array, fieldType); context.doc().add(point); - addStoredFieldForVectorField(context, fieldType, name(), point.toString()); + addStoredFieldForVectorField(context, fieldType, name(), point); if (hasDocValues && vectorFieldType != null) { context.doc().add(new VectorField(name(), array, vectorFieldType));