From bff857d6b2995f5a80d54a02af54be42fe90d5f3 Mon Sep 17 00:00:00 2001 From: Varun Jain Date: Tue, 14 Jan 2025 11:39:28 -0800 Subject: [PATCH 1/5] Update pagination_depth datatype from int to Integer (#1094) * Update pagination_depth datatype from int to Integer Signed-off-by: Varun Jain --- .../opensearch/neuralsearch/query/HybridQuery.java | 3 ++- .../neuralsearch/query/HybridQueryBuilder.java | 14 +++++++++----- .../neuralsearch/query/HybridQueryContext.java | 4 +--- .../search/query/HybridCollectorManager.java | 1 + .../query/HybridQueryBuilderTests.java | 2 +- .../neuralsearch/query/HybridQueryTests.java | 2 +- 6 files changed, 15 insertions(+), 11 deletions(-) diff --git a/src/main/java/org/opensearch/neuralsearch/query/HybridQuery.java b/src/main/java/org/opensearch/neuralsearch/query/HybridQuery.java index f08120ce9..d1e339bd5 100644 --- a/src/main/java/org/opensearch/neuralsearch/query/HybridQuery.java +++ b/src/main/java/org/opensearch/neuralsearch/query/HybridQuery.java @@ -46,7 +46,8 @@ public HybridQuery(final Collection subQueries, final List filterQ if (subQueries.isEmpty()) { throw new IllegalArgumentException("collection of queries must not be empty"); } - if (hybridQueryContext.getPaginationDepth() == 0) { + Integer paginationDepth = hybridQueryContext.getPaginationDepth(); + if (Objects.nonNull(paginationDepth) && paginationDepth == 0) { throw new IllegalArgumentException("pagination_depth must not be zero"); } if (Objects.isNull(filterQueries) || filterQueries.isEmpty()) { diff --git a/src/main/java/org/opensearch/neuralsearch/query/HybridQueryBuilder.java b/src/main/java/org/opensearch/neuralsearch/query/HybridQueryBuilder.java index 55f6b4a96..bea94e603 100644 --- a/src/main/java/org/opensearch/neuralsearch/query/HybridQueryBuilder.java +++ b/src/main/java/org/opensearch/neuralsearch/query/HybridQueryBuilder.java @@ -55,7 +55,7 @@ public final class HybridQueryBuilder extends AbstractQueryBuilder queries = new ArrayList<>(); - private int paginationDepth; + private Integer paginationDepth; static final int MAX_NUMBER_OF_SUB_QUERIES = 5; private final static int DEFAULT_PAGINATION_DEPTH = 10; @@ -65,7 +65,7 @@ public HybridQueryBuilder(StreamInput in) throws IOException { super(in); queries.addAll(readQueries(in)); if (isClusterOnOrAfterMinReqVersionForPaginationInHybridQuery()) { - paginationDepth = in.readInt(); + paginationDepth = in.readOptionalInt(); } } @@ -78,7 +78,7 @@ public HybridQueryBuilder(StreamInput in) throws IOException { protected void doWriteTo(StreamOutput out) throws IOException { writeQueries(out, queries); if (isClusterOnOrAfterMinReqVersionForPaginationInHybridQuery()) { - out.writeInt(paginationDepth); + out.writeOptionalInt(paginationDepth); } } @@ -109,8 +109,9 @@ protected void doXContent(XContentBuilder builder, Params params) throws IOExcep queryBuilder.toXContent(builder, params); } builder.endArray(); - if (isClusterOnOrAfterMinReqVersionForPaginationInHybridQuery()) { - builder.field(PAGINATION_DEPTH_FIELD.getPreferredName(), paginationDepth == 0 ? DEFAULT_PAGINATION_DEPTH : paginationDepth); + // TODO https://github.com/opensearch-project/neural-search/issues/1097 + if (Objects.nonNull(paginationDepth)) { + builder.field(PAGINATION_DEPTH_FIELD.getPreferredName(), paginationDepth); } printBoostAndQueryName(builder); builder.endObject(); @@ -324,6 +325,9 @@ private Collection toQueries(Collection queryBuilders, Quer } private static void validatePaginationDepth(final int paginationDepth, final QueryShardContext queryShardContext) { + if (Objects.isNull(paginationDepth)) { + return; + } if (paginationDepth < LOWER_BOUND_OF_PAGINATION_DEPTH) { throw new IllegalArgumentException( String.format(Locale.ROOT, "pagination_depth should be greater than %s", LOWER_BOUND_OF_PAGINATION_DEPTH) diff --git a/src/main/java/org/opensearch/neuralsearch/query/HybridQueryContext.java b/src/main/java/org/opensearch/neuralsearch/query/HybridQueryContext.java index 3be741bcd..34706e6e7 100644 --- a/src/main/java/org/opensearch/neuralsearch/query/HybridQueryContext.java +++ b/src/main/java/org/opensearch/neuralsearch/query/HybridQueryContext.java @@ -6,7 +6,6 @@ import lombok.Builder; import lombok.Getter; -import lombok.NonNull; /** * Class that holds the low level information of hybrid query in the form of context @@ -14,6 +13,5 @@ @Builder @Getter public class HybridQueryContext { - @NonNull - private int paginationDepth; + private Integer paginationDepth; } diff --git a/src/main/java/org/opensearch/neuralsearch/search/query/HybridCollectorManager.java b/src/main/java/org/opensearch/neuralsearch/search/query/HybridCollectorManager.java index 03290f421..3c6a7271f 100644 --- a/src/main/java/org/opensearch/neuralsearch/search/query/HybridCollectorManager.java +++ b/src/main/java/org/opensearch/neuralsearch/search/query/HybridCollectorManager.java @@ -491,6 +491,7 @@ private static int getSubqueryResultsRetrievalSize(final SearchContext searchCon if (searchContext.from() == 0) { return searchContext.size(); } + log.info("pagination_depth is {}", paginationDepth); return paginationDepth; } diff --git a/src/test/java/org/opensearch/neuralsearch/query/HybridQueryBuilderTests.java b/src/test/java/org/opensearch/neuralsearch/query/HybridQueryBuilderTests.java index c22d174ec..a6cf4d29e 100644 --- a/src/test/java/org/opensearch/neuralsearch/query/HybridQueryBuilderTests.java +++ b/src/test/java/org/opensearch/neuralsearch/query/HybridQueryBuilderTests.java @@ -426,7 +426,7 @@ public void testFromXContent_whenMultipleSubQueries_thenBuildSuccessfully() { assertEquals(2, queryTwoSubQueries.queries().size()); assertTrue(queryTwoSubQueries.queries().get(0) instanceof NeuralQueryBuilder); assertTrue(queryTwoSubQueries.queries().get(1) instanceof TermQueryBuilder); - assertEquals(10, queryTwoSubQueries.paginationDepth()); + assertEquals(10, queryTwoSubQueries.paginationDepth().intValue()); // verify knn vector query NeuralQueryBuilder neuralQueryBuilder = (NeuralQueryBuilder) queryTwoSubQueries.queries().get(0); assertEquals(VECTOR_FIELD_NAME, neuralQueryBuilder.fieldName()); diff --git a/src/test/java/org/opensearch/neuralsearch/query/HybridQueryTests.java b/src/test/java/org/opensearch/neuralsearch/query/HybridQueryTests.java index 8e429e270..26babdbce 100644 --- a/src/test/java/org/opensearch/neuralsearch/query/HybridQueryTests.java +++ b/src/test/java/org/opensearch/neuralsearch/query/HybridQueryTests.java @@ -99,7 +99,7 @@ public void testQueryBasics_whenMultipleDifferentQueries_thenSuccessful() { countOfQueries++; } assertEquals(2, countOfQueries); - assertEquals(10, query3.getQueryContext().getPaginationDepth()); + assertEquals(10, query3.getQueryContext().getPaginationDepth().intValue()); } @SneakyThrows From c98b9c28126307af6e8e5e4a26f8b3ad470ccecf Mon Sep 17 00:00:00 2001 From: Heemin Kim Date: Wed, 15 Jan 2025 08:22:07 -0800 Subject: [PATCH 2/5] Use different package name between restart/rolling bwc tests (#1096) Signed-off-by: Heemin Kim --- qa/restart-upgrade/build.gradle | 40 +++++++++---------- .../AbstractRestartUpgradeRestTestCase.java | 2 +- .../bwc/{ => restart}/BatchIngestionIT.java | 2 +- .../bwc/{ => restart}/HybridSearchIT.java | 2 +- .../HybridSearchWithRescoreIT.java | 2 +- .../bwc/{ => restart}/KnnRadialSearchIT.java | 2 +- .../bwc/{ => restart}/MultiModalSearchIT.java | 2 +- .../NeuralQueryEnricherProcessorIT.java | 2 +- .../{ => restart}/NeuralSparseSearchIT.java | 2 +- .../NeuralSparseTwoPhaseProcessorIT.java | 2 +- .../bwc/{ => restart}/SemanticSearchIT.java | 2 +- .../TextChunkingProcessorIT.java | 2 +- .../AbstractRollingUpgradeTestCase.java | 2 +- .../bwc/{ => rolling}/BatchIngestionIT.java | 2 +- .../bwc/{ => rolling}/HybridSearchIT.java | 2 +- .../HybridSearchWithRescoreIT.java | 2 +- .../bwc/{ => rolling}/KnnRadialSearchIT.java | 2 +- .../bwc/{ => rolling}/MultiModalSearchIT.java | 2 +- .../NeuralQueryEnricherProcessorIT.java | 2 +- .../{ => rolling}/NeuralSparseSearchIT.java | 2 +- .../NeuralSparseTwoPhaseProcessorIT.java | 2 +- .../bwc/{ => rolling}/SemanticSearchIT.java | 2 +- .../TextChunkingProcessorIT.java | 2 +- 23 files changed, 42 insertions(+), 42 deletions(-) rename qa/restart-upgrade/src/test/java/org/opensearch/neuralsearch/bwc/{ => restart}/AbstractRestartUpgradeRestTestCase.java (99%) rename qa/restart-upgrade/src/test/java/org/opensearch/neuralsearch/bwc/{ => restart}/BatchIngestionIT.java (97%) rename qa/restart-upgrade/src/test/java/org/opensearch/neuralsearch/bwc/{ => restart}/HybridSearchIT.java (99%) rename qa/restart-upgrade/src/test/java/org/opensearch/neuralsearch/bwc/{ => restart}/HybridSearchWithRescoreIT.java (99%) rename qa/restart-upgrade/src/test/java/org/opensearch/neuralsearch/bwc/{ => restart}/KnnRadialSearchIT.java (98%) rename qa/restart-upgrade/src/test/java/org/opensearch/neuralsearch/bwc/{ => restart}/MultiModalSearchIT.java (98%) rename qa/restart-upgrade/src/test/java/org/opensearch/neuralsearch/bwc/{ => restart}/NeuralQueryEnricherProcessorIT.java (99%) rename qa/restart-upgrade/src/test/java/org/opensearch/neuralsearch/bwc/{ => restart}/NeuralSparseSearchIT.java (98%) rename qa/restart-upgrade/src/test/java/org/opensearch/neuralsearch/bwc/{ => restart}/NeuralSparseTwoPhaseProcessorIT.java (98%) rename qa/restart-upgrade/src/test/java/org/opensearch/neuralsearch/bwc/{ => restart}/SemanticSearchIT.java (98%) rename qa/restart-upgrade/src/test/java/org/opensearch/neuralsearch/bwc/{ => restart}/TextChunkingProcessorIT.java (98%) rename qa/rolling-upgrade/src/test/java/org/opensearch/neuralsearch/bwc/{ => rolling}/AbstractRollingUpgradeTestCase.java (99%) rename qa/rolling-upgrade/src/test/java/org/opensearch/neuralsearch/bwc/{ => rolling}/BatchIngestionIT.java (98%) rename qa/rolling-upgrade/src/test/java/org/opensearch/neuralsearch/bwc/{ => rolling}/HybridSearchIT.java (99%) rename qa/rolling-upgrade/src/test/java/org/opensearch/neuralsearch/bwc/{ => rolling}/HybridSearchWithRescoreIT.java (99%) rename qa/rolling-upgrade/src/test/java/org/opensearch/neuralsearch/bwc/{ => rolling}/KnnRadialSearchIT.java (99%) rename qa/rolling-upgrade/src/test/java/org/opensearch/neuralsearch/bwc/{ => rolling}/MultiModalSearchIT.java (98%) rename qa/rolling-upgrade/src/test/java/org/opensearch/neuralsearch/bwc/{ => rolling}/NeuralQueryEnricherProcessorIT.java (99%) rename qa/rolling-upgrade/src/test/java/org/opensearch/neuralsearch/bwc/{ => rolling}/NeuralSparseSearchIT.java (99%) rename qa/rolling-upgrade/src/test/java/org/opensearch/neuralsearch/bwc/{ => rolling}/NeuralSparseTwoPhaseProcessorIT.java (98%) rename qa/rolling-upgrade/src/test/java/org/opensearch/neuralsearch/bwc/{ => rolling}/SemanticSearchIT.java (98%) rename qa/rolling-upgrade/src/test/java/org/opensearch/neuralsearch/bwc/{ => rolling}/TextChunkingProcessorIT.java (98%) diff --git a/qa/restart-upgrade/build.gradle b/qa/restart-upgrade/build.gradle index d0069d1a3..053866cf6 100644 --- a/qa/restart-upgrade/build.gradle +++ b/qa/restart-upgrade/build.gradle @@ -76,40 +76,40 @@ task testAgainstOldCluster(type: StandaloneRestIntegTestTask) { // because these features were released in 2.11 version. if (versionsBelow2_11.any { ext.neural_search_bwc_version.startsWith(it) }){ filter { - excludeTestsMatching "org.opensearch.neuralsearch.bwc.MultiModalSearchIT.*" - excludeTestsMatching "org.opensearch.neuralsearch.bwc.HybridSearchIT.*" - excludeTestsMatching "org.opensearch.neuralsearch.bwc.NeuralSparseSearchIT.*" - excludeTestsMatching "org.opensearch.neuralsearch.bwc.NeuralQueryEnricherProcessorIT.*" + excludeTestsMatching "org.opensearch.neuralsearch.bwc.restart.MultiModalSearchIT.*" + excludeTestsMatching "org.opensearch.neuralsearch.bwc.restart.HybridSearchIT.*" + excludeTestsMatching "org.opensearch.neuralsearch.bwc.restart.NeuralSparseSearchIT.*" + excludeTestsMatching "org.opensearch.neuralsearch.bwc.restart.NeuralQueryEnricherProcessorIT.*" } } // Excluding the these tests because we introduce them in 2.13 if (versionsBelow2_13.any { ext.neural_search_bwc_version.startsWith(it) }){ filter { - excludeTestsMatching "org.opensearch.neuralsearch.bwc.NeuralQueryEnricherProcessorIT.testNeuralQueryEnricherProcessor_NeuralSparseSearch_E2EFlow" - excludeTestsMatching "org.opensearch.neuralsearch.bwc.TextChunkingProcessorIT.*" + excludeTestsMatching "org.opensearch.neuralsearch.bwc.restart.NeuralQueryEnricherProcessorIT.testNeuralQueryEnricherProcessor_NeuralSparseSearch_E2EFlow" + excludeTestsMatching "org.opensearch.neuralsearch.bwc.restart.TextChunkingProcessorIT.*" } } // Excluding the k-NN radial search tests because we introduce this feature in 2.14 if (versionsBelow2_14.any { ext.neural_search_bwc_version.startsWith(it) }){ filter { - excludeTestsMatching "org.opensearch.neuralsearch.bwc.KnnRadialSearchIT.*" + excludeTestsMatching "org.opensearch.neuralsearch.bwc.restart.KnnRadialSearchIT.*" } } // Excluding the NeuralSparseQuery two-phase search pipeline tests because we introduce this feature in 2.15 if (versionsBelow2_15.any { ext.neural_search_bwc_version.startsWith(it) }){ filter { - excludeTestsMatching "org.opensearch.neuralsearch.bwc.NeuralSparseTwoPhaseProcessorIT.*" - excludeTestsMatching "org.opensearch.neuralsearch.bwc.HybridSearchWithRescoreIT.*" + excludeTestsMatching "org.opensearch.neuralsearch.bwc.restart.NeuralSparseTwoPhaseProcessorIT.*" + excludeTestsMatching "org.opensearch.neuralsearch.bwc.restart.HybridSearchWithRescoreIT.*" } } // Excluding the batching processor tests because we introduce this feature in 2.16 if (versionsBelow2_16.any { ext.neural_search_bwc_version.startsWith(it) }){ filter { - excludeTestsMatching "org.opensearch.neuralsearch.bwc.BatchIngestionIT.*" + excludeTestsMatching "org.opensearch.neuralsearch.bwc.restart.BatchIngestionIT.*" } } @@ -141,40 +141,40 @@ task testAgainstNewCluster(type: StandaloneRestIntegTestTask) { // because these features were released in 2.11 version. if (versionsBelow2_11.any { ext.neural_search_bwc_version.startsWith(it) }){ filter { - excludeTestsMatching "org.opensearch.neuralsearch.bwc.MultiModalSearchIT.*" - excludeTestsMatching "org.opensearch.neuralsearch.bwc.HybridSearchIT.*" - excludeTestsMatching "org.opensearch.neuralsearch.bwc.NeuralSparseSearchIT.*" - excludeTestsMatching "org.opensearch.neuralsearch.bwc.NeuralQueryEnricherProcessorIT.*" + excludeTestsMatching "org.opensearch.neuralsearch.bwc.restart.MultiModalSearchIT.*" + excludeTestsMatching "org.opensearch.neuralsearch.bwc.restart.HybridSearchIT.*" + excludeTestsMatching "org.opensearch.neuralsearch.bwc.restart.NeuralSparseSearchIT.*" + excludeTestsMatching "org.opensearch.neuralsearch.bwc.restart.NeuralQueryEnricherProcessorIT.*" } } // Excluding these tests because we introduce them in 2.13 if (versionsBelow2_13.any { ext.neural_search_bwc_version.startsWith(it) }){ filter { - excludeTestsMatching "org.opensearch.neuralsearch.bwc.NeuralQueryEnricherProcessorIT.testNeuralQueryEnricherProcessor_NeuralSparseSearch_E2EFlow" - excludeTestsMatching "org.opensearch.neuralsearch.bwc.TextChunkingProcessorIT.*" + excludeTestsMatching "org.opensearch.neuralsearch.bwc.restart.NeuralQueryEnricherProcessorIT.testNeuralQueryEnricherProcessor_NeuralSparseSearch_E2EFlow" + excludeTestsMatching "org.opensearch.neuralsearch.bwc.restart.TextChunkingProcessorIT.*" } } // Excluding the k-NN radial search tests because we introduce this feature in 2.14 if (versionsBelow2_14.any { ext.neural_search_bwc_version.startsWith(it) }){ filter { - excludeTestsMatching "org.opensearch.neuralsearch.bwc.KnnRadialSearchIT.*" + excludeTestsMatching "org.opensearch.neuralsearch.bwc.restart.KnnRadialSearchIT.*" } } // Excluding the NeuralSparseQuery two-phase search pipeline tests because we introduce this feature in 2.15 if (versionsBelow2_15.any { ext.neural_search_bwc_version.startsWith(it) }){ filter { - excludeTestsMatching "org.opensearch.neuralsearch.bwc.NeuralSparseTwoPhaseProcessorIT.*" - excludeTestsMatching "org.opensearch.neuralsearch.bwc.HybridSearchWithRescoreIT.*" + excludeTestsMatching "org.opensearch.neuralsearch.bwc.restart.NeuralSparseTwoPhaseProcessorIT.*" + excludeTestsMatching "org.opensearch.neuralsearch.bwc.restart.HybridSearchWithRescoreIT.*" } } // Excluding the batch processor tests because we introduce this feature in 2.16 if (versionsBelow2_16.any { ext.neural_search_bwc_version.startsWith(it) }){ filter { - excludeTestsMatching "org.opensearch.neuralsearch.bwc.BatchIngestionIT.*" + excludeTestsMatching "org.opensearch.neuralsearch.bwc.restart.BatchIngestionIT.*" } } diff --git a/qa/restart-upgrade/src/test/java/org/opensearch/neuralsearch/bwc/AbstractRestartUpgradeRestTestCase.java b/qa/restart-upgrade/src/test/java/org/opensearch/neuralsearch/bwc/restart/AbstractRestartUpgradeRestTestCase.java similarity index 99% rename from qa/restart-upgrade/src/test/java/org/opensearch/neuralsearch/bwc/AbstractRestartUpgradeRestTestCase.java rename to qa/restart-upgrade/src/test/java/org/opensearch/neuralsearch/bwc/restart/AbstractRestartUpgradeRestTestCase.java index 7028888ca..ca5a3e34e 100644 --- a/qa/restart-upgrade/src/test/java/org/opensearch/neuralsearch/bwc/AbstractRestartUpgradeRestTestCase.java +++ b/qa/restart-upgrade/src/test/java/org/opensearch/neuralsearch/bwc/restart/AbstractRestartUpgradeRestTestCase.java @@ -2,7 +2,7 @@ * Copyright OpenSearch Contributors * SPDX-License-Identifier: Apache-2.0 */ -package org.opensearch.neuralsearch.bwc; +package org.opensearch.neuralsearch.bwc.restart; import java.nio.file.Files; import java.nio.file.Path; diff --git a/qa/restart-upgrade/src/test/java/org/opensearch/neuralsearch/bwc/BatchIngestionIT.java b/qa/restart-upgrade/src/test/java/org/opensearch/neuralsearch/bwc/restart/BatchIngestionIT.java similarity index 97% rename from qa/restart-upgrade/src/test/java/org/opensearch/neuralsearch/bwc/BatchIngestionIT.java rename to qa/restart-upgrade/src/test/java/org/opensearch/neuralsearch/bwc/restart/BatchIngestionIT.java index f9cd11251..7fa0b1301 100644 --- a/qa/restart-upgrade/src/test/java/org/opensearch/neuralsearch/bwc/BatchIngestionIT.java +++ b/qa/restart-upgrade/src/test/java/org/opensearch/neuralsearch/bwc/restart/BatchIngestionIT.java @@ -2,7 +2,7 @@ * Copyright OpenSearch Contributors * SPDX-License-Identifier: Apache-2.0 */ -package org.opensearch.neuralsearch.bwc; +package org.opensearch.neuralsearch.bwc.restart; import org.opensearch.neuralsearch.util.TestUtils; diff --git a/qa/restart-upgrade/src/test/java/org/opensearch/neuralsearch/bwc/HybridSearchIT.java b/qa/restart-upgrade/src/test/java/org/opensearch/neuralsearch/bwc/restart/HybridSearchIT.java similarity index 99% rename from qa/restart-upgrade/src/test/java/org/opensearch/neuralsearch/bwc/HybridSearchIT.java rename to qa/restart-upgrade/src/test/java/org/opensearch/neuralsearch/bwc/restart/HybridSearchIT.java index 56ffec24a..d08d208da 100644 --- a/qa/restart-upgrade/src/test/java/org/opensearch/neuralsearch/bwc/HybridSearchIT.java +++ b/qa/restart-upgrade/src/test/java/org/opensearch/neuralsearch/bwc/restart/HybridSearchIT.java @@ -2,7 +2,7 @@ * Copyright OpenSearch Contributors * SPDX-License-Identifier: Apache-2.0 */ -package org.opensearch.neuralsearch.bwc; +package org.opensearch.neuralsearch.bwc.restart; import java.io.IOException; import java.nio.file.Files; diff --git a/qa/restart-upgrade/src/test/java/org/opensearch/neuralsearch/bwc/HybridSearchWithRescoreIT.java b/qa/restart-upgrade/src/test/java/org/opensearch/neuralsearch/bwc/restart/HybridSearchWithRescoreIT.java similarity index 99% rename from qa/restart-upgrade/src/test/java/org/opensearch/neuralsearch/bwc/HybridSearchWithRescoreIT.java rename to qa/restart-upgrade/src/test/java/org/opensearch/neuralsearch/bwc/restart/HybridSearchWithRescoreIT.java index fb8596905..9329a934b 100644 --- a/qa/restart-upgrade/src/test/java/org/opensearch/neuralsearch/bwc/HybridSearchWithRescoreIT.java +++ b/qa/restart-upgrade/src/test/java/org/opensearch/neuralsearch/bwc/restart/HybridSearchWithRescoreIT.java @@ -2,7 +2,7 @@ * Copyright OpenSearch Contributors * SPDX-License-Identifier: Apache-2.0 */ -package org.opensearch.neuralsearch.bwc; +package org.opensearch.neuralsearch.bwc.restart; import java.nio.file.Files; import java.nio.file.Path; diff --git a/qa/restart-upgrade/src/test/java/org/opensearch/neuralsearch/bwc/KnnRadialSearchIT.java b/qa/restart-upgrade/src/test/java/org/opensearch/neuralsearch/bwc/restart/KnnRadialSearchIT.java similarity index 98% rename from qa/restart-upgrade/src/test/java/org/opensearch/neuralsearch/bwc/KnnRadialSearchIT.java rename to qa/restart-upgrade/src/test/java/org/opensearch/neuralsearch/bwc/restart/KnnRadialSearchIT.java index 838d7ae8a..2e3ac34ab 100644 --- a/qa/restart-upgrade/src/test/java/org/opensearch/neuralsearch/bwc/KnnRadialSearchIT.java +++ b/qa/restart-upgrade/src/test/java/org/opensearch/neuralsearch/bwc/restart/KnnRadialSearchIT.java @@ -2,7 +2,7 @@ * Copyright OpenSearch Contributors * SPDX-License-Identifier: Apache-2.0 */ -package org.opensearch.neuralsearch.bwc; +package org.opensearch.neuralsearch.bwc.restart; import java.nio.file.Files; import java.nio.file.Path; diff --git a/qa/restart-upgrade/src/test/java/org/opensearch/neuralsearch/bwc/MultiModalSearchIT.java b/qa/restart-upgrade/src/test/java/org/opensearch/neuralsearch/bwc/restart/MultiModalSearchIT.java similarity index 98% rename from qa/restart-upgrade/src/test/java/org/opensearch/neuralsearch/bwc/MultiModalSearchIT.java rename to qa/restart-upgrade/src/test/java/org/opensearch/neuralsearch/bwc/restart/MultiModalSearchIT.java index 7c098ef42..df3f8e94e 100644 --- a/qa/restart-upgrade/src/test/java/org/opensearch/neuralsearch/bwc/MultiModalSearchIT.java +++ b/qa/restart-upgrade/src/test/java/org/opensearch/neuralsearch/bwc/restart/MultiModalSearchIT.java @@ -2,7 +2,7 @@ * Copyright OpenSearch Contributors * SPDX-License-Identifier: Apache-2.0 */ -package org.opensearch.neuralsearch.bwc; +package org.opensearch.neuralsearch.bwc.restart; import java.nio.file.Files; import java.nio.file.Path; diff --git a/qa/restart-upgrade/src/test/java/org/opensearch/neuralsearch/bwc/NeuralQueryEnricherProcessorIT.java b/qa/restart-upgrade/src/test/java/org/opensearch/neuralsearch/bwc/restart/NeuralQueryEnricherProcessorIT.java similarity index 99% rename from qa/restart-upgrade/src/test/java/org/opensearch/neuralsearch/bwc/NeuralQueryEnricherProcessorIT.java rename to qa/restart-upgrade/src/test/java/org/opensearch/neuralsearch/bwc/restart/NeuralQueryEnricherProcessorIT.java index e9005f0d6..97182bfa0 100644 --- a/qa/restart-upgrade/src/test/java/org/opensearch/neuralsearch/bwc/NeuralQueryEnricherProcessorIT.java +++ b/qa/restart-upgrade/src/test/java/org/opensearch/neuralsearch/bwc/restart/NeuralQueryEnricherProcessorIT.java @@ -2,7 +2,7 @@ * Copyright OpenSearch Contributors * SPDX-License-Identifier: Apache-2.0 */ -package org.opensearch.neuralsearch.bwc; +package org.opensearch.neuralsearch.bwc.restart; import static org.opensearch.neuralsearch.util.TestUtils.NODES_BWC_CLUSTER; import static org.opensearch.neuralsearch.util.TestUtils.SPARSE_ENCODING_PROCESSOR; diff --git a/qa/restart-upgrade/src/test/java/org/opensearch/neuralsearch/bwc/NeuralSparseSearchIT.java b/qa/restart-upgrade/src/test/java/org/opensearch/neuralsearch/bwc/restart/NeuralSparseSearchIT.java similarity index 98% rename from qa/restart-upgrade/src/test/java/org/opensearch/neuralsearch/bwc/NeuralSparseSearchIT.java rename to qa/restart-upgrade/src/test/java/org/opensearch/neuralsearch/bwc/restart/NeuralSparseSearchIT.java index 8ec54711a..5978f71ab 100644 --- a/qa/restart-upgrade/src/test/java/org/opensearch/neuralsearch/bwc/NeuralSparseSearchIT.java +++ b/qa/restart-upgrade/src/test/java/org/opensearch/neuralsearch/bwc/restart/NeuralSparseSearchIT.java @@ -2,7 +2,7 @@ * Copyright OpenSearch Contributors * SPDX-License-Identifier: Apache-2.0 */ -package org.opensearch.neuralsearch.bwc; +package org.opensearch.neuralsearch.bwc.restart; import java.nio.file.Files; import java.nio.file.Path; diff --git a/qa/restart-upgrade/src/test/java/org/opensearch/neuralsearch/bwc/NeuralSparseTwoPhaseProcessorIT.java b/qa/restart-upgrade/src/test/java/org/opensearch/neuralsearch/bwc/restart/NeuralSparseTwoPhaseProcessorIT.java similarity index 98% rename from qa/restart-upgrade/src/test/java/org/opensearch/neuralsearch/bwc/NeuralSparseTwoPhaseProcessorIT.java rename to qa/restart-upgrade/src/test/java/org/opensearch/neuralsearch/bwc/restart/NeuralSparseTwoPhaseProcessorIT.java index 4b00a7916..6a6994809 100644 --- a/qa/restart-upgrade/src/test/java/org/opensearch/neuralsearch/bwc/NeuralSparseTwoPhaseProcessorIT.java +++ b/qa/restart-upgrade/src/test/java/org/opensearch/neuralsearch/bwc/restart/NeuralSparseTwoPhaseProcessorIT.java @@ -2,7 +2,7 @@ * Copyright OpenSearch Contributors * SPDX-License-Identifier: Apache-2.0 */ -package org.opensearch.neuralsearch.bwc; +package org.opensearch.neuralsearch.bwc.restart; import org.opensearch.common.settings.Settings; import org.opensearch.neuralsearch.query.NeuralSparseQueryBuilder; diff --git a/qa/restart-upgrade/src/test/java/org/opensearch/neuralsearch/bwc/SemanticSearchIT.java b/qa/restart-upgrade/src/test/java/org/opensearch/neuralsearch/bwc/restart/SemanticSearchIT.java similarity index 98% rename from qa/restart-upgrade/src/test/java/org/opensearch/neuralsearch/bwc/SemanticSearchIT.java rename to qa/restart-upgrade/src/test/java/org/opensearch/neuralsearch/bwc/restart/SemanticSearchIT.java index dcbf58ed8..9dc35c02d 100644 --- a/qa/restart-upgrade/src/test/java/org/opensearch/neuralsearch/bwc/SemanticSearchIT.java +++ b/qa/restart-upgrade/src/test/java/org/opensearch/neuralsearch/bwc/restart/SemanticSearchIT.java @@ -2,7 +2,7 @@ * Copyright OpenSearch Contributors * SPDX-License-Identifier: Apache-2.0 */ -package org.opensearch.neuralsearch.bwc; +package org.opensearch.neuralsearch.bwc.restart; import java.nio.file.Files; import java.nio.file.Path; diff --git a/qa/restart-upgrade/src/test/java/org/opensearch/neuralsearch/bwc/TextChunkingProcessorIT.java b/qa/restart-upgrade/src/test/java/org/opensearch/neuralsearch/bwc/restart/TextChunkingProcessorIT.java similarity index 98% rename from qa/restart-upgrade/src/test/java/org/opensearch/neuralsearch/bwc/TextChunkingProcessorIT.java rename to qa/restart-upgrade/src/test/java/org/opensearch/neuralsearch/bwc/restart/TextChunkingProcessorIT.java index ba44eba9a..d68903478 100644 --- a/qa/restart-upgrade/src/test/java/org/opensearch/neuralsearch/bwc/TextChunkingProcessorIT.java +++ b/qa/restart-upgrade/src/test/java/org/opensearch/neuralsearch/bwc/restart/TextChunkingProcessorIT.java @@ -2,7 +2,7 @@ * Copyright OpenSearch Contributors * SPDX-License-Identifier: Apache-2.0 */ -package org.opensearch.neuralsearch.bwc; +package org.opensearch.neuralsearch.bwc.restart; import java.net.URL; import java.nio.file.Files; diff --git a/qa/rolling-upgrade/src/test/java/org/opensearch/neuralsearch/bwc/AbstractRollingUpgradeTestCase.java b/qa/rolling-upgrade/src/test/java/org/opensearch/neuralsearch/bwc/rolling/AbstractRollingUpgradeTestCase.java similarity index 99% rename from qa/rolling-upgrade/src/test/java/org/opensearch/neuralsearch/bwc/AbstractRollingUpgradeTestCase.java rename to qa/rolling-upgrade/src/test/java/org/opensearch/neuralsearch/bwc/rolling/AbstractRollingUpgradeTestCase.java index 48688938c..a1ad7178c 100644 --- a/qa/rolling-upgrade/src/test/java/org/opensearch/neuralsearch/bwc/AbstractRollingUpgradeTestCase.java +++ b/qa/rolling-upgrade/src/test/java/org/opensearch/neuralsearch/bwc/rolling/AbstractRollingUpgradeTestCase.java @@ -2,7 +2,7 @@ * Copyright OpenSearch Contributors * SPDX-License-Identifier: Apache-2.0 */ -package org.opensearch.neuralsearch.bwc; +package org.opensearch.neuralsearch.bwc.rolling; import java.nio.file.Files; import java.nio.file.Path; diff --git a/qa/rolling-upgrade/src/test/java/org/opensearch/neuralsearch/bwc/BatchIngestionIT.java b/qa/rolling-upgrade/src/test/java/org/opensearch/neuralsearch/bwc/rolling/BatchIngestionIT.java similarity index 98% rename from qa/rolling-upgrade/src/test/java/org/opensearch/neuralsearch/bwc/BatchIngestionIT.java rename to qa/rolling-upgrade/src/test/java/org/opensearch/neuralsearch/bwc/rolling/BatchIngestionIT.java index aef3e323a..01fce83f0 100644 --- a/qa/rolling-upgrade/src/test/java/org/opensearch/neuralsearch/bwc/BatchIngestionIT.java +++ b/qa/rolling-upgrade/src/test/java/org/opensearch/neuralsearch/bwc/rolling/BatchIngestionIT.java @@ -2,7 +2,7 @@ * Copyright OpenSearch Contributors * SPDX-License-Identifier: Apache-2.0 */ -package org.opensearch.neuralsearch.bwc; +package org.opensearch.neuralsearch.bwc.rolling; import org.opensearch.neuralsearch.util.TestUtils; diff --git a/qa/rolling-upgrade/src/test/java/org/opensearch/neuralsearch/bwc/HybridSearchIT.java b/qa/rolling-upgrade/src/test/java/org/opensearch/neuralsearch/bwc/rolling/HybridSearchIT.java similarity index 99% rename from qa/rolling-upgrade/src/test/java/org/opensearch/neuralsearch/bwc/HybridSearchIT.java rename to qa/rolling-upgrade/src/test/java/org/opensearch/neuralsearch/bwc/rolling/HybridSearchIT.java index 44671ed4a..de7ddef55 100644 --- a/qa/rolling-upgrade/src/test/java/org/opensearch/neuralsearch/bwc/HybridSearchIT.java +++ b/qa/rolling-upgrade/src/test/java/org/opensearch/neuralsearch/bwc/rolling/HybridSearchIT.java @@ -2,7 +2,7 @@ * Copyright OpenSearch Contributors * SPDX-License-Identifier: Apache-2.0 */ -package org.opensearch.neuralsearch.bwc; +package org.opensearch.neuralsearch.bwc.rolling; import java.nio.file.Files; import java.nio.file.Path; diff --git a/qa/rolling-upgrade/src/test/java/org/opensearch/neuralsearch/bwc/HybridSearchWithRescoreIT.java b/qa/rolling-upgrade/src/test/java/org/opensearch/neuralsearch/bwc/rolling/HybridSearchWithRescoreIT.java similarity index 99% rename from qa/rolling-upgrade/src/test/java/org/opensearch/neuralsearch/bwc/HybridSearchWithRescoreIT.java rename to qa/rolling-upgrade/src/test/java/org/opensearch/neuralsearch/bwc/rolling/HybridSearchWithRescoreIT.java index 4526f6be0..3cea2400d 100644 --- a/qa/rolling-upgrade/src/test/java/org/opensearch/neuralsearch/bwc/HybridSearchWithRescoreIT.java +++ b/qa/rolling-upgrade/src/test/java/org/opensearch/neuralsearch/bwc/rolling/HybridSearchWithRescoreIT.java @@ -2,7 +2,7 @@ * Copyright OpenSearch Contributors * SPDX-License-Identifier: Apache-2.0 */ -package org.opensearch.neuralsearch.bwc; +package org.opensearch.neuralsearch.bwc.rolling; import org.opensearch.index.query.MatchQueryBuilder; import org.opensearch.index.query.QueryBuilder; diff --git a/qa/rolling-upgrade/src/test/java/org/opensearch/neuralsearch/bwc/KnnRadialSearchIT.java b/qa/rolling-upgrade/src/test/java/org/opensearch/neuralsearch/bwc/rolling/KnnRadialSearchIT.java similarity index 99% rename from qa/rolling-upgrade/src/test/java/org/opensearch/neuralsearch/bwc/KnnRadialSearchIT.java rename to qa/rolling-upgrade/src/test/java/org/opensearch/neuralsearch/bwc/rolling/KnnRadialSearchIT.java index 88af8b757..e9fb1a4a7 100644 --- a/qa/rolling-upgrade/src/test/java/org/opensearch/neuralsearch/bwc/KnnRadialSearchIT.java +++ b/qa/rolling-upgrade/src/test/java/org/opensearch/neuralsearch/bwc/rolling/KnnRadialSearchIT.java @@ -2,7 +2,7 @@ * Copyright OpenSearch Contributors * SPDX-License-Identifier: Apache-2.0 */ -package org.opensearch.neuralsearch.bwc; +package org.opensearch.neuralsearch.bwc.rolling; import java.nio.file.Files; import java.nio.file.Path; diff --git a/qa/rolling-upgrade/src/test/java/org/opensearch/neuralsearch/bwc/MultiModalSearchIT.java b/qa/rolling-upgrade/src/test/java/org/opensearch/neuralsearch/bwc/rolling/MultiModalSearchIT.java similarity index 98% rename from qa/rolling-upgrade/src/test/java/org/opensearch/neuralsearch/bwc/MultiModalSearchIT.java rename to qa/rolling-upgrade/src/test/java/org/opensearch/neuralsearch/bwc/rolling/MultiModalSearchIT.java index e2df88d6d..6ced20d95 100644 --- a/qa/rolling-upgrade/src/test/java/org/opensearch/neuralsearch/bwc/MultiModalSearchIT.java +++ b/qa/rolling-upgrade/src/test/java/org/opensearch/neuralsearch/bwc/rolling/MultiModalSearchIT.java @@ -2,7 +2,7 @@ * Copyright OpenSearch Contributors * SPDX-License-Identifier: Apache-2.0 */ -package org.opensearch.neuralsearch.bwc; +package org.opensearch.neuralsearch.bwc.rolling; import java.nio.file.Files; import java.nio.file.Path; diff --git a/qa/rolling-upgrade/src/test/java/org/opensearch/neuralsearch/bwc/NeuralQueryEnricherProcessorIT.java b/qa/rolling-upgrade/src/test/java/org/opensearch/neuralsearch/bwc/rolling/NeuralQueryEnricherProcessorIT.java similarity index 99% rename from qa/rolling-upgrade/src/test/java/org/opensearch/neuralsearch/bwc/NeuralQueryEnricherProcessorIT.java rename to qa/rolling-upgrade/src/test/java/org/opensearch/neuralsearch/bwc/rolling/NeuralQueryEnricherProcessorIT.java index 58e362470..e22e65e4a 100644 --- a/qa/rolling-upgrade/src/test/java/org/opensearch/neuralsearch/bwc/NeuralQueryEnricherProcessorIT.java +++ b/qa/rolling-upgrade/src/test/java/org/opensearch/neuralsearch/bwc/rolling/NeuralQueryEnricherProcessorIT.java @@ -2,7 +2,7 @@ * Copyright OpenSearch Contributors * SPDX-License-Identifier: Apache-2.0 */ -package org.opensearch.neuralsearch.bwc; +package org.opensearch.neuralsearch.bwc.rolling; import org.opensearch.common.settings.Settings; import org.opensearch.neuralsearch.util.TestUtils; diff --git a/qa/rolling-upgrade/src/test/java/org/opensearch/neuralsearch/bwc/NeuralSparseSearchIT.java b/qa/rolling-upgrade/src/test/java/org/opensearch/neuralsearch/bwc/rolling/NeuralSparseSearchIT.java similarity index 99% rename from qa/rolling-upgrade/src/test/java/org/opensearch/neuralsearch/bwc/NeuralSparseSearchIT.java rename to qa/rolling-upgrade/src/test/java/org/opensearch/neuralsearch/bwc/rolling/NeuralSparseSearchIT.java index 3a27d0271..8cebae4a2 100644 --- a/qa/rolling-upgrade/src/test/java/org/opensearch/neuralsearch/bwc/NeuralSparseSearchIT.java +++ b/qa/rolling-upgrade/src/test/java/org/opensearch/neuralsearch/bwc/rolling/NeuralSparseSearchIT.java @@ -2,7 +2,7 @@ * Copyright OpenSearch Contributors * SPDX-License-Identifier: Apache-2.0 */ -package org.opensearch.neuralsearch.bwc; +package org.opensearch.neuralsearch.bwc.rolling; import java.nio.file.Files; import java.nio.file.Path; diff --git a/qa/rolling-upgrade/src/test/java/org/opensearch/neuralsearch/bwc/NeuralSparseTwoPhaseProcessorIT.java b/qa/rolling-upgrade/src/test/java/org/opensearch/neuralsearch/bwc/rolling/NeuralSparseTwoPhaseProcessorIT.java similarity index 98% rename from qa/rolling-upgrade/src/test/java/org/opensearch/neuralsearch/bwc/NeuralSparseTwoPhaseProcessorIT.java rename to qa/rolling-upgrade/src/test/java/org/opensearch/neuralsearch/bwc/rolling/NeuralSparseTwoPhaseProcessorIT.java index c95ee93e0..5338b777f 100644 --- a/qa/rolling-upgrade/src/test/java/org/opensearch/neuralsearch/bwc/NeuralSparseTwoPhaseProcessorIT.java +++ b/qa/rolling-upgrade/src/test/java/org/opensearch/neuralsearch/bwc/rolling/NeuralSparseTwoPhaseProcessorIT.java @@ -2,7 +2,7 @@ * Copyright OpenSearch Contributors * SPDX-License-Identifier: Apache-2.0 */ -package org.opensearch.neuralsearch.bwc; +package org.opensearch.neuralsearch.bwc.rolling; import org.opensearch.common.settings.Settings; import org.opensearch.neuralsearch.query.NeuralSparseQueryBuilder; diff --git a/qa/rolling-upgrade/src/test/java/org/opensearch/neuralsearch/bwc/SemanticSearchIT.java b/qa/rolling-upgrade/src/test/java/org/opensearch/neuralsearch/bwc/rolling/SemanticSearchIT.java similarity index 98% rename from qa/rolling-upgrade/src/test/java/org/opensearch/neuralsearch/bwc/SemanticSearchIT.java rename to qa/rolling-upgrade/src/test/java/org/opensearch/neuralsearch/bwc/rolling/SemanticSearchIT.java index e30411709..989d53897 100644 --- a/qa/rolling-upgrade/src/test/java/org/opensearch/neuralsearch/bwc/SemanticSearchIT.java +++ b/qa/rolling-upgrade/src/test/java/org/opensearch/neuralsearch/bwc/rolling/SemanticSearchIT.java @@ -2,7 +2,7 @@ * Copyright OpenSearch Contributors * SPDX-License-Identifier: Apache-2.0 */ -package org.opensearch.neuralsearch.bwc; +package org.opensearch.neuralsearch.bwc.rolling; import java.nio.file.Files; import java.nio.file.Path; diff --git a/qa/rolling-upgrade/src/test/java/org/opensearch/neuralsearch/bwc/TextChunkingProcessorIT.java b/qa/rolling-upgrade/src/test/java/org/opensearch/neuralsearch/bwc/rolling/TextChunkingProcessorIT.java similarity index 98% rename from qa/rolling-upgrade/src/test/java/org/opensearch/neuralsearch/bwc/TextChunkingProcessorIT.java rename to qa/rolling-upgrade/src/test/java/org/opensearch/neuralsearch/bwc/rolling/TextChunkingProcessorIT.java index f2451c480..29ec1fed5 100644 --- a/qa/rolling-upgrade/src/test/java/org/opensearch/neuralsearch/bwc/TextChunkingProcessorIT.java +++ b/qa/rolling-upgrade/src/test/java/org/opensearch/neuralsearch/bwc/rolling/TextChunkingProcessorIT.java @@ -2,7 +2,7 @@ * Copyright OpenSearch Contributors * SPDX-License-Identifier: Apache-2.0 */ -package org.opensearch.neuralsearch.bwc; +package org.opensearch.neuralsearch.bwc.rolling; import java.net.URL; import java.nio.file.Files; From 3130de36cb2ec6d08faa6f5301cee5ad69d359bd Mon Sep 17 00:00:00 2001 From: yuye-aws Date: Thu, 16 Jan 2025 09:05:28 +0800 Subject: [PATCH 3/5] Enhance: Add comments to chunking-related code (#1106) * add logs for chunker parameter parser Signed-off-by: yuye-aws * add logs for delimiter chunker Signed-off-by: yuye-aws * add comments for delimiter chunker Signed-off-by: yuye-aws * update comments for delimiter chunker Signed-off-by: yuye-aws * update comments for chunker interface Signed-off-by: yuye-aws * add comments for chunker facdtory Signed-off-by: yuye-aws * update comments for chunker interface Signed-off-by: yuye-aws --------- Signed-off-by: yuye-aws --- .../processor/chunker/Chunker.java | 8 ++ .../processor/chunker/ChunkerFactory.java | 8 ++ .../chunker/ChunkerParameterParser.java | 73 +++++++++++++------ .../processor/chunker/DelimiterChunker.java | 8 ++ .../chunker/FixedTokenLengthChunker.java | 15 +++- 5 files changed, 90 insertions(+), 22 deletions(-) diff --git a/src/main/java/org/opensearch/neuralsearch/processor/chunker/Chunker.java b/src/main/java/org/opensearch/neuralsearch/processor/chunker/Chunker.java index 3fa2eeb7c..f8f496291 100644 --- a/src/main/java/org/opensearch/neuralsearch/processor/chunker/Chunker.java +++ b/src/main/java/org/opensearch/neuralsearch/processor/chunker/Chunker.java @@ -13,9 +13,16 @@ */ public interface Chunker { + /** Field name for specifying the maximum chunk limit in the configuration. */ String MAX_CHUNK_LIMIT_FIELD = "max_chunk_limit"; + + /** Field name for tracking the count of chunked strings. */ String CHUNK_STRING_COUNT_FIELD = "chunk_string_count"; + + /** Default maximum number of chunks allowed (100). */ int DEFAULT_MAX_CHUNK_LIMIT = 100; + + /** Special value (-1) indicating that chunk limiting is disabled. */ int DISABLED_MAX_CHUNK_LIMIT = -1; /** @@ -42,6 +49,7 @@ public interface Chunker { * @param chunkResultSize the size of chunking result * @param runtimeMaxChunkLimit runtime max_chunk_limit, used to check with chunkResultSize * @param chunkStringCount runtime chunk_string_count, used to check with chunkResultSize + * @return true if adding the new chunks would exceed the limit, false otherwise */ static boolean checkRunTimeMaxChunkLimit(int chunkResultSize, int runtimeMaxChunkLimit, int chunkStringCount) { return runtimeMaxChunkLimit != DISABLED_MAX_CHUNK_LIMIT && chunkResultSize + chunkStringCount >= runtimeMaxChunkLimit; diff --git a/src/main/java/org/opensearch/neuralsearch/processor/chunker/ChunkerFactory.java b/src/main/java/org/opensearch/neuralsearch/processor/chunker/ChunkerFactory.java index aab9eaa3e..6b4ddd594 100644 --- a/src/main/java/org/opensearch/neuralsearch/processor/chunker/ChunkerFactory.java +++ b/src/main/java/org/opensearch/neuralsearch/processor/chunker/ChunkerFactory.java @@ -25,8 +25,16 @@ private ChunkerFactory() {} // no instance of this factory class DelimiterChunker::new ); + /** Set of supported chunker algorithm types */ public static Set CHUNKER_ALGORITHMS = CHUNKERS_CONSTRUCTORS.keySet(); + /** + * Creates a new Chunker instance based on the specified type and parameters. + * + * @param type the type of chunker to create + * @param parameters configuration parameters for the chunker + * @return a new Chunker instance configured with the given parameters + */ public static Chunker create(final String type, final Map parameters) { Function, Chunker> chunkerConstructionFunction = CHUNKERS_CONSTRUCTORS.get(type); // chunkerConstructionFunction is not null because we have validated the type in text chunking processor diff --git a/src/main/java/org/opensearch/neuralsearch/processor/chunker/ChunkerParameterParser.java b/src/main/java/org/opensearch/neuralsearch/processor/chunker/ChunkerParameterParser.java index 52d8eef00..a26371651 100644 --- a/src/main/java/org/opensearch/neuralsearch/processor/chunker/ChunkerParameterParser.java +++ b/src/main/java/org/opensearch/neuralsearch/processor/chunker/ChunkerParameterParser.java @@ -19,8 +19,12 @@ public final class ChunkerParameterParser { private ChunkerParameterParser() {} // no instance of this util class /** - * Parse String type parameter. - * Throw IllegalArgumentException if parameter is not a string or an empty string. + * Parses and validates a string parameter from the parameters map. + * + * @param parameters The map containing chunking parameters + * @param fieldName The name of the field to extract from the parameters map + * @return The validated string value from the parameters map + * @throws IllegalArgumentException if the parameter is not a string or is empty */ public static String parseString(final Map parameters, final String fieldName) { Object fieldValue = parameters.get(fieldName); @@ -36,9 +40,13 @@ public static String parseString(final Map parameters, final Str } /** - * Parse String type parameter. - * Return default value if the parameter is missing. - * Throw IllegalArgumentException if parameter is not a string or an empty string. + * Parses and validates a string parameter from the parameters map with fallback to a default value. + * + * @param parameters The map containing chunking parameters + * @param fieldName The name of the field to extract from the parameters map + * @param defaultValue The default value to return if the parameter is not present + * @return The validated string value from the parameters map if present, otherwise the default value + * @throws IllegalArgumentException if the parameter is present but is not a string or is empty */ public static String parseStringWithDefault(final Map parameters, final String fieldName, final String defaultValue) { if (!parameters.containsKey(fieldName)) { @@ -49,8 +57,12 @@ public static String parseStringWithDefault(final Map parameters } /** - * Parse integer type parameter with default value. - * Throw IllegalArgumentException if the parameter is not an integer. + * Parses and validates an integer value from the parameters map. + * + * @param parameters The map containing chunking parameters + * @param fieldName The name of the field to extract from the parameters map + * @return The parsed integer value from the parameters map + * @throws IllegalArgumentException if the parameter is not an integer or is empty */ public static int parseInteger(final Map parameters, final String fieldName) { String fieldValueString = parameters.get(fieldName).toString(); @@ -64,9 +76,13 @@ public static int parseInteger(final Map parameters, final Strin } /** - * Parse integer type parameter with default value. - * Return default value if the parameter is missing. - * Throw IllegalArgumentException if the parameter is not an integer. + * Parses and validates an integer parameter from the parameters map with fallback to a default value. + * + * @param parameters The map containing chunking parameters + * @param fieldName The name of the field to extract from the parameters map + * @param defaultValue The default value to return if the parameter is not present + * @return The integer value from the parameters map if present, otherwise the default value + * @throws IllegalArgumentException if the parameter is present but cannot be converted to an integer */ public static int parseIntegerWithDefault(final Map parameters, final String fieldName, final int defaultValue) { if (!parameters.containsKey(fieldName)) { @@ -77,9 +93,12 @@ public static int parseIntegerWithDefault(final Map parameters, } /** - * Parse integer type parameter with positive value. - * Return default value if the parameter is missing. - * Throw IllegalArgumentException if the parameter is not a positive integer. + * Parses and validates a positive integer parameter from the parameters map. + * + * @param parameters The map containing chunking parameters + * @param fieldName The name of the field to extract from the parameters map + * @return The parsed positive integer value + * @throws IllegalArgumentException if the parameter is not a positive integer or cannot be converted to an integer */ public static int parsePositiveInteger(final Map parameters, final String fieldName) { int fieldValueInt = parseInteger(parameters, fieldName); @@ -90,9 +109,13 @@ public static int parsePositiveInteger(final Map parameters, fin } /** - * Parse integer type parameter with positive value. - * Return default value if the parameter is missing. - * Throw IllegalArgumentException if the parameter is not a positive integer. + * Parses and validates a positive integer parameter from the parameters map with fallback to a default value. + * + * @param parameters The map containing chunking parameters + * @param fieldName The name of the field to extract from the parameters map + * @param defaultValue The default value to return if the parameter is not present + * @return The positive integer value from the parameters map if present, otherwise the default value + * @throws IllegalArgumentException if the parameter is present but is not a positive integer */ public static int parsePositiveIntegerWithDefault( final Map parameters, @@ -107,8 +130,12 @@ public static int parsePositiveIntegerWithDefault( } /** - * Parse double type parameter. - * Throw IllegalArgumentException if parameter is not a double. + * Parses and validates a double value from the parameters map. + * + * @param parameters The map containing chunking parameters + * @param fieldName The name of the field to extract from the parameters map + * @return The parsed double value + * @throws IllegalArgumentException if the parameter cannot be converted to a double */ public static double parseDouble(final Map parameters, final String fieldName) { String fieldValueString = parameters.get(fieldName).toString(); @@ -122,9 +149,13 @@ public static double parseDouble(final Map parameters, final Str } /** - * Parse double type parameter. - * Return default value if the parameter is missing. - * Throw IllegalArgumentException if parameter is not a double. + * Parses and validates a double value from the parameters map with fallback to a default value. + * + * @param parameters The map containing chunking parameters + * @param fieldName The name of the field to extract from the parameters map + * @param defaultValue The default value to return if the parameter is not present + * @return The double value from the parameters map if present, otherwise the default value + * @throws IllegalArgumentException if the parameter is present but cannot be converted to a double */ public static double parseDoubleWithDefault(final Map parameters, final String fieldName, final double defaultValue) { if (!parameters.containsKey(fieldName)) { diff --git a/src/main/java/org/opensearch/neuralsearch/processor/chunker/DelimiterChunker.java b/src/main/java/org/opensearch/neuralsearch/processor/chunker/DelimiterChunker.java index 0f3d66c55..0cee22d97 100644 --- a/src/main/java/org/opensearch/neuralsearch/processor/chunker/DelimiterChunker.java +++ b/src/main/java/org/opensearch/neuralsearch/processor/chunker/DelimiterChunker.java @@ -16,14 +16,22 @@ */ public final class DelimiterChunker implements Chunker { + /** The identifier for the delimiter chunking algorithm. */ public static final String ALGORITHM_NAME = "delimiter"; + /** The parameter field name for specifying the delimiter. */ public static final String DELIMITER_FIELD = "delimiter"; + /** The default delimiter value used when none is specified. Uses two consecutive newline characters to split on paragraph boundaries. */ public static final String DEFAULT_DELIMITER = "\n\n"; + /** The delimiter string used for text chunking. */ private String delimiter; + /** + * Constructor that initializes the delimiter chunker with the specified parameters. + * @param parameters a map with non-runtime parameters to be parsed + */ public DelimiterChunker(final Map parameters) { parseParameters(parameters); } diff --git a/src/main/java/org/opensearch/neuralsearch/processor/chunker/FixedTokenLengthChunker.java b/src/main/java/org/opensearch/neuralsearch/processor/chunker/FixedTokenLengthChunker.java index 614ea33f9..3b364814a 100644 --- a/src/main/java/org/opensearch/neuralsearch/processor/chunker/FixedTokenLengthChunker.java +++ b/src/main/java/org/opensearch/neuralsearch/processor/chunker/FixedTokenLengthChunker.java @@ -24,13 +24,22 @@ */ public final class FixedTokenLengthChunker implements Chunker { + /** The identifier for the fixed token length chunking algorithm. */ public static final String ALGORITHM_NAME = "fixed_token_length"; - // field name for each parameter + /** Field name for the analysis registry configuration parameter. */ public static final String ANALYSIS_REGISTRY_FIELD = "analysis_registry"; + + /** Field name for specifying the maximum number of tokens per chunk. */ public static final String TOKEN_LIMIT_FIELD = "token_limit"; + + /** Field name for specifying the overlap rate between consecutive chunks. */ public static final String OVERLAP_RATE_FIELD = "overlap_rate"; + + /** Field name for specifying the maximum token count allowed in the input text. */ public static final String MAX_TOKEN_COUNT_FIELD = "max_token_count"; + + /** Field name for specifying the tokenizer to be used for text analysis. */ public static final String TOKENIZER_FIELD = "tokenizer"; // default values for each non-runtime parameter @@ -57,6 +66,10 @@ public final class FixedTokenLengthChunker implements Chunker { private double overlapRate; private final AnalysisRegistry analysisRegistry; + /** + * Constructor that initializes the fixed token length chunker with the specified parameters. + * @param parameters a map with non-runtime parameters to be parsed + */ public FixedTokenLengthChunker(final Map parameters) { parseParameters(parameters); this.analysisRegistry = (AnalysisRegistry) parameters.get(ANALYSIS_REGISTRY_FIELD); From 7e01c5effc59d0bbf1b743346f7cf43ed640ce68 Mon Sep 17 00:00:00 2001 From: Brian Flores Date: Thu, 16 Jan 2025 08:16:55 -0800 Subject: [PATCH 4/5] ByField Rerank Improvements (#969) * fix: better exception message Signed-off-by: Brian Flores --- .../rerank/ByFieldRerankProcessor.java | 31 ++++-- .../processor/util/ProcessorUtils.java | 29 ++++++ .../rerank/ByFieldRerankProcessorTests.java | 95 ++++++++++++++++++- 3 files changed, 143 insertions(+), 12 deletions(-) diff --git a/src/main/java/org/opensearch/neuralsearch/processor/rerank/ByFieldRerankProcessor.java b/src/main/java/org/opensearch/neuralsearch/processor/rerank/ByFieldRerankProcessor.java index 28bf7866f..bd13b78b1 100644 --- a/src/main/java/org/opensearch/neuralsearch/processor/rerank/ByFieldRerankProcessor.java +++ b/src/main/java/org/opensearch/neuralsearch/processor/rerank/ByFieldRerankProcessor.java @@ -23,6 +23,7 @@ import static org.opensearch.neuralsearch.processor.util.ProcessorUtils.getScoreFromSourceMap; import static org.opensearch.neuralsearch.processor.util.ProcessorUtils.getValueFromSource; +import static org.opensearch.neuralsearch.processor.util.ProcessorUtils.isNumeric; import static org.opensearch.neuralsearch.processor.util.ProcessorUtils.mappingExistsInSource; import static org.opensearch.neuralsearch.processor.util.ProcessorUtils.removeTargetFieldFromSource; import static org.opensearch.neuralsearch.processor.util.ProcessorUtils.validateRerankCriteria; @@ -113,7 +114,6 @@ public void rescoreSearchResponse( final ActionListener> listener ) { SearchHit[] searchHits = response.getHits().getHits(); - SearchHitValidator searchHitValidator = this::byFieldSearchHitValidator; if (!validateRerankCriteria(searchHits, searchHitValidator, listener)) { @@ -162,26 +162,41 @@ public void rescoreSearchResponse( */ public void byFieldSearchHitValidator(final SearchHit hit) { if (!hit.hasSource()) { - log.error(String.format(Locale.ROOT, "There is no source field to be able to perform rerank on hit [%d]", hit.docId())); + log.error(String.format(Locale.ROOT, "There is no source field to be able to perform rerank on hit [%s]", hit.getId())); throw new IllegalArgumentException( - String.format(Locale.ROOT, "There is no source field to be able to perform rerank on hit [%d]", hit.docId()) + String.format(Locale.ROOT, "There is no source field to be able to perform rerank on hit [%s]", hit.getId()) ); } Map sourceMap = hit.getSourceAsMap(); if (!mappingExistsInSource(sourceMap, targetField)) { - log.error(String.format(Locale.ROOT, "The field to rerank [%s] is not found at hit [%d]", targetField, hit.docId())); + log.error(String.format(Locale.ROOT, "The field to rerank [%s] is not found at hit [%s]", targetField, hit.getId())); - throw new IllegalArgumentException(String.format(Locale.ROOT, "The field to rerank by is not found at hit [%d]", hit.docId())); + throw new IllegalArgumentException(String.format(Locale.ROOT, "The field to rerank by is not found at hit [%s]", hit.getId())); } Optional val = getValueFromSource(sourceMap, targetField); - if (!(val.get() instanceof Number)) { - log.error(String.format(Locale.ROOT, "The field mapping to rerank [%s: %s] is not Numerical", targetField, val.orElse(null))); + if (!(isNumeric(val.get()))) { + // Strictly get the type of value removing the prefix of getClass() having a value is guaranteed so no NPE check + String typeOfMapping = val.get().getClass().getSimpleName(); + log.error( + String.format( + Locale.ROOT, + "The field mapping to rerank [%s: %s] is not Numerical, instead of type [%s]", + targetField, + val.orElse(null), + typeOfMapping + ) + ); throw new IllegalArgumentException( - String.format(Locale.ROOT, "The field mapping to rerank by [%s] is not Numerical", val.orElse(null)) + String.format( + Locale.ROOT, + "The field mapping to rerank by [%s] is not Numerical, instead of type [%s]", + val.orElse(null), + typeOfMapping + ) ); } diff --git a/src/main/java/org/opensearch/neuralsearch/processor/util/ProcessorUtils.java b/src/main/java/org/opensearch/neuralsearch/processor/util/ProcessorUtils.java index a6a377843..d799f323f 100644 --- a/src/main/java/org/opensearch/neuralsearch/processor/util/ProcessorUtils.java +++ b/src/main/java/org/opensearch/neuralsearch/processor/util/ProcessorUtils.java @@ -47,6 +47,7 @@ public interface SearchHitValidator { * for each SearchHit follows the correct form as specified by the validator. * When just one of the conditions fail (as specified by the validator) the exception will be thrown to the listener. * @param searchHits from the SearchResponse + * @param validator The given validator used to check every search hit being correct * @param listener returns an error to the listener in case on of the conditions fail * @return The status indicating that the SearchHits are in correct form to perform the Rerank */ @@ -77,6 +78,9 @@ public static boolean validateRerankCriteria( */ public static float getScoreFromSourceMap(final Map sourceAsMap, final String targetField) { Object val = getValueFromSource(sourceAsMap, targetField).get(); + if (val instanceof String) { + return Float.parseFloat((String) val); + } return ((Number) val).floatValue(); } @@ -180,4 +184,29 @@ public static boolean mappingExistsInSource(final Map sourceAsMa return getValueFromSource(sourceAsMap, pathToValue).isPresent(); } + /** + * @param value Any value to be determined to be numerical + * @return whether the value can be turned into a number + */ + public static boolean isNumeric(Object value) { + if (value == null) { + return false; + } + + if (value instanceof Number) { + return true; + } + + if (value instanceof String) { + String string = (String) value; + try { + Double.parseDouble(string); + return true; + } catch (NumberFormatException e) { + return false; + } + } + + return false; + } } diff --git a/src/test/java/org/opensearch/neuralsearch/processor/rerank/ByFieldRerankProcessorTests.java b/src/test/java/org/opensearch/neuralsearch/processor/rerank/ByFieldRerankProcessorTests.java index a2555663d..2e9fdd05f 100644 --- a/src/test/java/org/opensearch/neuralsearch/processor/rerank/ByFieldRerankProcessorTests.java +++ b/src/test/java/org/opensearch/neuralsearch/processor/rerank/ByFieldRerankProcessorTests.java @@ -755,6 +755,90 @@ public void testRerank_keepsTargetFieldAndHasNoPreviousScore_WhenByFieldHasDefau } } + public void testRerank_reranksHits_WhenTargetFieldIsNumericalString() throws IOException { + String targetField = "ml.info.score"; + setUpValidSearchResultsWithNestedTargetValueWithNumericalString(); + List> sortedScoresDescending = sampleIndexMLScorePairs.stream() + .sorted(Map.Entry.comparingByValue().reversed()) + .toList(); + + Map config = new HashMap<>( + Map.of(RerankType.BY_FIELD.getLabel(), new HashMap<>(Map.of(ByFieldRerankProcessor.TARGET_FIELD, targetField))) + ); + processor = (ByFieldRerankProcessor) factory.create( + Map.of(), + "rerank processor", + "processor for 2nd level reranking based on provided field, This will check a nested field and numerical string", + false, + config, + pipelineContext + ); + ActionListener listener = mock(ActionListener.class); + processor.rerank(response, Map.of(), listener); + ArgumentCaptor argCaptor = ArgumentCaptor.forClass(SearchResponse.class); + + verify(listener, times(1)).onResponse(argCaptor.capture()); + SearchResponse searchResponse = argCaptor.getValue(); + + assertEquals(sampleIndexMLScorePairs.size(), searchResponse.getHits().getHits().length); + assertEquals(sortedScoresDescending.getFirst().getValue(), searchResponse.getHits().getMaxScore(), 0.0001); + + for (int i = 0; i < sortedScoresDescending.size(); i++) { + int docId = sortedScoresDescending.get(i).getKey(); + float ml_score = sortedScoresDescending.get(i).getValue(); + assertEquals(docId, searchResponse.getHits().getAt(i).docId()); + assertEquals(ml_score, searchResponse.getHits().getAt(i).getScore(), 0.001); + + // Test that the path to targetField is valid + Map currentMap = searchResponse.getHits().getAt(i).getSourceAsMap(); + String[] keys = targetField.split("\\."); + String lastKey = keys[keys.length - 1]; + for (int keyIndex = 0; keyIndex < keys.length - 1; keyIndex++) { + String key = keys[keyIndex]; + assertTrue("The key:" + key + "does not exist in" + currentMap, currentMap.containsKey(key)); + currentMap = (Map) currentMap.get(key); + } + assertTrue("The key:" + lastKey + "does not exist in" + currentMap, currentMap.containsKey(lastKey)); + + } + } + + /** + * Setups a search response that has a target field with a numerical string for example "3.2" + * Which can be used by the processor to rerank documents. + */ + private void setUpValidSearchResultsWithNestedTargetValueWithNumericalString() { + SearchHit[] hits = new SearchHit[sampleIndexMLScorePairs.size()]; + + String templateString = """ + { + "my_field" : "%s", + "ml": { + "info" : { + "score": "%s" + } + } + } + """.replace("\n", ""); + + for (int i = 0; i < sampleIndexMLScorePairs.size(); i++) { + int docId = sampleIndexMLScorePairs.get(i).getKey(); + String mlScore = sampleIndexMLScorePairs.get(i).getValue() + ""; + + String sourceMap = templateString.formatted(i, mlScore); + + hits[i] = new SearchHit(docId, docId + "", Collections.emptyMap(), Collections.emptyMap()); + hits[i].sourceRef(new BytesArray(sourceMap)); + hits[i].score(1); + } + + TotalHits totalHits = new TotalHits(sampleIndexMLScorePairs.size(), TotalHits.Relation.EQUAL_TO); + + SearchHits searchHits = new SearchHits(hits, totalHits, 1.0f); + SearchResponseSections internal = new SearchResponseSections(searchHits, null, null, false, false, null, 0); + response = new SearchResponse(internal, null, 1, 1, 0, 1, new ShardSearchFailure[0], new SearchResponse.Clusters(1, 1, 0), null); + } + /** * Creates a searchResponse where the value to reRank by is Nested. * The location where the target is within a map of size 1 meaning after @@ -869,7 +953,7 @@ public void testRerank_throwsExceptionOnNoSource_WhenSearchResponseHasNoSourceMa ArgumentCaptor argumentCaptor = ArgumentCaptor.forClass(Exception.class); verify(listener, times(1)).onFailure(argumentCaptor.capture()); - assertEquals("There is no source field to be able to perform rerank on hit [" + 1 + "]", argumentCaptor.getValue().getMessage()); + assertEquals("There is no source field to be able to perform rerank on hit [" + 2 + "]", argumentCaptor.getValue().getMessage()); assert (argumentCaptor.getValue() instanceof IllegalArgumentException); } @@ -907,7 +991,7 @@ public void testRerank_throwsExceptionOnMappingNotExistingInSource_WhenSearchRes ArgumentCaptor argumentCaptor = ArgumentCaptor.forClass(Exception.class); verify(listener, times(1)).onFailure(argumentCaptor.capture()); - assertEquals("The field to rerank by is not found at hit [" + 1 + "]", argumentCaptor.getValue().getMessage()); + assertEquals("The field to rerank by is not found at hit [" + 2 + "]", argumentCaptor.getValue().getMessage()); assert (argumentCaptor.getValue() instanceof IllegalArgumentException); } @@ -947,7 +1031,7 @@ public void testRerank_throwsExceptionOnHavingEmptyMapping_WhenTargetFieldHasNul ArgumentCaptor argumentCaptor = ArgumentCaptor.forClass(Exception.class); verify(listener, times(1)).onFailure(argumentCaptor.capture()); - assertEquals("The field to rerank by is not found at hit [" + 1 + "]", argumentCaptor.getValue().getMessage()); + assertEquals("The field to rerank by is not found at hit [" + 2 + "]", argumentCaptor.getValue().getMessage()); assert (argumentCaptor.getValue() instanceof IllegalArgumentException); } @@ -985,7 +1069,10 @@ public void testRerank_throwsExceptionOnHavingNonNumericValue_WhenTargetFieldHas ArgumentCaptor argumentCaptor = ArgumentCaptor.forClass(Exception.class); verify(listener, times(1)).onFailure(argumentCaptor.capture()); - assertEquals("The field mapping to rerank by [hello world] is not Numerical", argumentCaptor.getValue().getMessage()); + assertEquals( + "The field mapping to rerank by [hello world] is not Numerical, instead of type [String]", + argumentCaptor.getValue().getMessage() + ); assert (argumentCaptor.getValue() instanceof IllegalArgumentException); } From 660f577c68d7c90d636a2d8a1002c57b0bd97f5b Mon Sep 17 00:00:00 2001 From: Ryan Bogan Date: Thu, 16 Jan 2025 11:18:49 -0800 Subject: [PATCH 5/5] Remove duplicate method call in NormalizationProcessorWorkflow (#1110) Signed-off-by: Ryan Bogan --- .../neuralsearch/processor/NormalizationProcessorWorkflow.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/main/java/org/opensearch/neuralsearch/processor/NormalizationProcessorWorkflow.java b/src/main/java/org/opensearch/neuralsearch/processor/NormalizationProcessorWorkflow.java index 51f30f842..1ddfe75a6 100644 --- a/src/main/java/org/opensearch/neuralsearch/processor/NormalizationProcessorWorkflow.java +++ b/src/main/java/org/opensearch/neuralsearch/processor/NormalizationProcessorWorkflow.java @@ -62,7 +62,7 @@ public class NormalizationProcessorWorkflow { public void execute(final NormalizationProcessorWorkflowExecuteRequest request) { List querySearchResults = request.getQuerySearchResults(); Optional fetchSearchResultOptional = request.getFetchSearchResultOptional(); - List unprocessedDocIds = unprocessedDocIds(request.getQuerySearchResults()); + List unprocessedDocIds = unprocessedDocIds(querySearchResults); // pre-process data log.debug("Pre-process query results");