Skip to content

Commit

Permalink
Merge branch 'main' into analyzer_dev
Browse files Browse the repository at this point in the history
  • Loading branch information
zhichao-aws committed Jan 17, 2025
2 parents 6cc80d7 + 660f577 commit cea345d
Show file tree
Hide file tree
Showing 38 changed files with 291 additions and 88 deletions.
40 changes: 20 additions & 20 deletions qa/restart-upgrade/build.gradle
Original file line number Diff line number Diff line change
Expand Up @@ -76,40 +76,40 @@ task testAgainstOldCluster(type: StandaloneRestIntegTestTask) {
// because these features were released in 2.11 version.
if (versionsBelow2_11.any { ext.neural_search_bwc_version.startsWith(it) }){
filter {
excludeTestsMatching "org.opensearch.neuralsearch.bwc.MultiModalSearchIT.*"
excludeTestsMatching "org.opensearch.neuralsearch.bwc.HybridSearchIT.*"
excludeTestsMatching "org.opensearch.neuralsearch.bwc.NeuralSparseSearchIT.*"
excludeTestsMatching "org.opensearch.neuralsearch.bwc.NeuralQueryEnricherProcessorIT.*"
excludeTestsMatching "org.opensearch.neuralsearch.bwc.restart.MultiModalSearchIT.*"
excludeTestsMatching "org.opensearch.neuralsearch.bwc.restart.HybridSearchIT.*"
excludeTestsMatching "org.opensearch.neuralsearch.bwc.restart.NeuralSparseSearchIT.*"
excludeTestsMatching "org.opensearch.neuralsearch.bwc.restart.NeuralQueryEnricherProcessorIT.*"
}
}

// Excluding the these tests because we introduce them in 2.13
if (versionsBelow2_13.any { ext.neural_search_bwc_version.startsWith(it) }){
filter {
excludeTestsMatching "org.opensearch.neuralsearch.bwc.NeuralQueryEnricherProcessorIT.testNeuralQueryEnricherProcessor_NeuralSparseSearch_E2EFlow"
excludeTestsMatching "org.opensearch.neuralsearch.bwc.TextChunkingProcessorIT.*"
excludeTestsMatching "org.opensearch.neuralsearch.bwc.restart.NeuralQueryEnricherProcessorIT.testNeuralQueryEnricherProcessor_NeuralSparseSearch_E2EFlow"
excludeTestsMatching "org.opensearch.neuralsearch.bwc.restart.TextChunkingProcessorIT.*"
}
}

// Excluding the k-NN radial search tests because we introduce this feature in 2.14
if (versionsBelow2_14.any { ext.neural_search_bwc_version.startsWith(it) }){
filter {
excludeTestsMatching "org.opensearch.neuralsearch.bwc.KnnRadialSearchIT.*"
excludeTestsMatching "org.opensearch.neuralsearch.bwc.restart.KnnRadialSearchIT.*"
}
}

// Excluding the NeuralSparseQuery two-phase search pipeline tests because we introduce this feature in 2.15
if (versionsBelow2_15.any { ext.neural_search_bwc_version.startsWith(it) }){
filter {
excludeTestsMatching "org.opensearch.neuralsearch.bwc.NeuralSparseTwoPhaseProcessorIT.*"
excludeTestsMatching "org.opensearch.neuralsearch.bwc.HybridSearchWithRescoreIT.*"
excludeTestsMatching "org.opensearch.neuralsearch.bwc.restart.NeuralSparseTwoPhaseProcessorIT.*"
excludeTestsMatching "org.opensearch.neuralsearch.bwc.restart.HybridSearchWithRescoreIT.*"
}
}

// Excluding the batching processor tests because we introduce this feature in 2.16
if (versionsBelow2_16.any { ext.neural_search_bwc_version.startsWith(it) }){
filter {
excludeTestsMatching "org.opensearch.neuralsearch.bwc.BatchIngestionIT.*"
excludeTestsMatching "org.opensearch.neuralsearch.bwc.restart.BatchIngestionIT.*"
}
}

Expand Down Expand Up @@ -141,40 +141,40 @@ task testAgainstNewCluster(type: StandaloneRestIntegTestTask) {
// because these features were released in 2.11 version.
if (versionsBelow2_11.any { ext.neural_search_bwc_version.startsWith(it) }){
filter {
excludeTestsMatching "org.opensearch.neuralsearch.bwc.MultiModalSearchIT.*"
excludeTestsMatching "org.opensearch.neuralsearch.bwc.HybridSearchIT.*"
excludeTestsMatching "org.opensearch.neuralsearch.bwc.NeuralSparseSearchIT.*"
excludeTestsMatching "org.opensearch.neuralsearch.bwc.NeuralQueryEnricherProcessorIT.*"
excludeTestsMatching "org.opensearch.neuralsearch.bwc.restart.MultiModalSearchIT.*"
excludeTestsMatching "org.opensearch.neuralsearch.bwc.restart.HybridSearchIT.*"
excludeTestsMatching "org.opensearch.neuralsearch.bwc.restart.NeuralSparseSearchIT.*"
excludeTestsMatching "org.opensearch.neuralsearch.bwc.restart.NeuralQueryEnricherProcessorIT.*"
}
}

// Excluding these tests because we introduce them in 2.13
if (versionsBelow2_13.any { ext.neural_search_bwc_version.startsWith(it) }){
filter {
excludeTestsMatching "org.opensearch.neuralsearch.bwc.NeuralQueryEnricherProcessorIT.testNeuralQueryEnricherProcessor_NeuralSparseSearch_E2EFlow"
excludeTestsMatching "org.opensearch.neuralsearch.bwc.TextChunkingProcessorIT.*"
excludeTestsMatching "org.opensearch.neuralsearch.bwc.restart.NeuralQueryEnricherProcessorIT.testNeuralQueryEnricherProcessor_NeuralSparseSearch_E2EFlow"
excludeTestsMatching "org.opensearch.neuralsearch.bwc.restart.TextChunkingProcessorIT.*"
}
}

// Excluding the k-NN radial search tests because we introduce this feature in 2.14
if (versionsBelow2_14.any { ext.neural_search_bwc_version.startsWith(it) }){
filter {
excludeTestsMatching "org.opensearch.neuralsearch.bwc.KnnRadialSearchIT.*"
excludeTestsMatching "org.opensearch.neuralsearch.bwc.restart.KnnRadialSearchIT.*"
}
}

// Excluding the NeuralSparseQuery two-phase search pipeline tests because we introduce this feature in 2.15
if (versionsBelow2_15.any { ext.neural_search_bwc_version.startsWith(it) }){
filter {
excludeTestsMatching "org.opensearch.neuralsearch.bwc.NeuralSparseTwoPhaseProcessorIT.*"
excludeTestsMatching "org.opensearch.neuralsearch.bwc.HybridSearchWithRescoreIT.*"
excludeTestsMatching "org.opensearch.neuralsearch.bwc.restart.NeuralSparseTwoPhaseProcessorIT.*"
excludeTestsMatching "org.opensearch.neuralsearch.bwc.restart.HybridSearchWithRescoreIT.*"
}
}

// Excluding the batch processor tests because we introduce this feature in 2.16
if (versionsBelow2_16.any { ext.neural_search_bwc_version.startsWith(it) }){
filter {
excludeTestsMatching "org.opensearch.neuralsearch.bwc.BatchIngestionIT.*"
excludeTestsMatching "org.opensearch.neuralsearch.bwc.restart.BatchIngestionIT.*"
}
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
* Copyright OpenSearch Contributors
* SPDX-License-Identifier: Apache-2.0
*/
package org.opensearch.neuralsearch.bwc;
package org.opensearch.neuralsearch.bwc.restart;

import java.nio.file.Files;
import java.nio.file.Path;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
* Copyright OpenSearch Contributors
* SPDX-License-Identifier: Apache-2.0
*/
package org.opensearch.neuralsearch.bwc;
package org.opensearch.neuralsearch.bwc.restart;

import org.opensearch.neuralsearch.util.TestUtils;

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
* Copyright OpenSearch Contributors
* SPDX-License-Identifier: Apache-2.0
*/
package org.opensearch.neuralsearch.bwc;
package org.opensearch.neuralsearch.bwc.restart;

import java.io.IOException;
import java.nio.file.Files;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
* Copyright OpenSearch Contributors
* SPDX-License-Identifier: Apache-2.0
*/
package org.opensearch.neuralsearch.bwc;
package org.opensearch.neuralsearch.bwc.restart;

import java.nio.file.Files;
import java.nio.file.Path;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
* Copyright OpenSearch Contributors
* SPDX-License-Identifier: Apache-2.0
*/
package org.opensearch.neuralsearch.bwc;
package org.opensearch.neuralsearch.bwc.restart;

import java.nio.file.Files;
import java.nio.file.Path;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
* Copyright OpenSearch Contributors
* SPDX-License-Identifier: Apache-2.0
*/
package org.opensearch.neuralsearch.bwc;
package org.opensearch.neuralsearch.bwc.restart;

import java.nio.file.Files;
import java.nio.file.Path;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
* Copyright OpenSearch Contributors
* SPDX-License-Identifier: Apache-2.0
*/
package org.opensearch.neuralsearch.bwc;
package org.opensearch.neuralsearch.bwc.restart;

import static org.opensearch.neuralsearch.util.TestUtils.NODES_BWC_CLUSTER;
import static org.opensearch.neuralsearch.util.TestUtils.SPARSE_ENCODING_PROCESSOR;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
* Copyright OpenSearch Contributors
* SPDX-License-Identifier: Apache-2.0
*/
package org.opensearch.neuralsearch.bwc;
package org.opensearch.neuralsearch.bwc.restart;

import java.nio.file.Files;
import java.nio.file.Path;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
* Copyright OpenSearch Contributors
* SPDX-License-Identifier: Apache-2.0
*/
package org.opensearch.neuralsearch.bwc;
package org.opensearch.neuralsearch.bwc.restart;

import org.opensearch.common.settings.Settings;
import org.opensearch.neuralsearch.query.NeuralSparseQueryBuilder;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
* Copyright OpenSearch Contributors
* SPDX-License-Identifier: Apache-2.0
*/
package org.opensearch.neuralsearch.bwc;
package org.opensearch.neuralsearch.bwc.restart;

import java.nio.file.Files;
import java.nio.file.Path;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
* Copyright OpenSearch Contributors
* SPDX-License-Identifier: Apache-2.0
*/
package org.opensearch.neuralsearch.bwc;
package org.opensearch.neuralsearch.bwc.restart;

import java.net.URL;
import java.nio.file.Files;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
* Copyright OpenSearch Contributors
* SPDX-License-Identifier: Apache-2.0
*/
package org.opensearch.neuralsearch.bwc;
package org.opensearch.neuralsearch.bwc.rolling;

import java.nio.file.Files;
import java.nio.file.Path;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
* Copyright OpenSearch Contributors
* SPDX-License-Identifier: Apache-2.0
*/
package org.opensearch.neuralsearch.bwc;
package org.opensearch.neuralsearch.bwc.rolling;

import org.opensearch.neuralsearch.util.TestUtils;

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
* Copyright OpenSearch Contributors
* SPDX-License-Identifier: Apache-2.0
*/
package org.opensearch.neuralsearch.bwc;
package org.opensearch.neuralsearch.bwc.rolling;

import java.nio.file.Files;
import java.nio.file.Path;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
* Copyright OpenSearch Contributors
* SPDX-License-Identifier: Apache-2.0
*/
package org.opensearch.neuralsearch.bwc;
package org.opensearch.neuralsearch.bwc.rolling;

import org.opensearch.index.query.MatchQueryBuilder;
import org.opensearch.index.query.QueryBuilder;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
* Copyright OpenSearch Contributors
* SPDX-License-Identifier: Apache-2.0
*/
package org.opensearch.neuralsearch.bwc;
package org.opensearch.neuralsearch.bwc.rolling;

import java.nio.file.Files;
import java.nio.file.Path;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
* Copyright OpenSearch Contributors
* SPDX-License-Identifier: Apache-2.0
*/
package org.opensearch.neuralsearch.bwc;
package org.opensearch.neuralsearch.bwc.rolling;

import java.nio.file.Files;
import java.nio.file.Path;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
* Copyright OpenSearch Contributors
* SPDX-License-Identifier: Apache-2.0
*/
package org.opensearch.neuralsearch.bwc;
package org.opensearch.neuralsearch.bwc.rolling;

import org.opensearch.common.settings.Settings;
import org.opensearch.neuralsearch.util.TestUtils;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
* Copyright OpenSearch Contributors
* SPDX-License-Identifier: Apache-2.0
*/
package org.opensearch.neuralsearch.bwc;
package org.opensearch.neuralsearch.bwc.rolling;

import java.nio.file.Files;
import java.nio.file.Path;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
* Copyright OpenSearch Contributors
* SPDX-License-Identifier: Apache-2.0
*/
package org.opensearch.neuralsearch.bwc;
package org.opensearch.neuralsearch.bwc.rolling;

import org.opensearch.common.settings.Settings;
import org.opensearch.neuralsearch.query.NeuralSparseQueryBuilder;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
* Copyright OpenSearch Contributors
* SPDX-License-Identifier: Apache-2.0
*/
package org.opensearch.neuralsearch.bwc;
package org.opensearch.neuralsearch.bwc.rolling;

import java.nio.file.Files;
import java.nio.file.Path;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
* Copyright OpenSearch Contributors
* SPDX-License-Identifier: Apache-2.0
*/
package org.opensearch.neuralsearch.bwc;
package org.opensearch.neuralsearch.bwc.rolling;

import java.net.URL;
import java.nio.file.Files;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,7 @@ public class NormalizationProcessorWorkflow {
public void execute(final NormalizationProcessorWorkflowExecuteRequest request) {
List<QuerySearchResult> querySearchResults = request.getQuerySearchResults();
Optional<FetchSearchResult> fetchSearchResultOptional = request.getFetchSearchResultOptional();
List<Integer> unprocessedDocIds = unprocessedDocIds(request.getQuerySearchResults());
List<Integer> unprocessedDocIds = unprocessedDocIds(querySearchResults);

// pre-process data
log.debug("Pre-process query results");
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,9 +13,16 @@
*/
public interface Chunker {

/** Field name for specifying the maximum chunk limit in the configuration. */
String MAX_CHUNK_LIMIT_FIELD = "max_chunk_limit";

/** Field name for tracking the count of chunked strings. */
String CHUNK_STRING_COUNT_FIELD = "chunk_string_count";

/** Default maximum number of chunks allowed (100). */
int DEFAULT_MAX_CHUNK_LIMIT = 100;

/** Special value (-1) indicating that chunk limiting is disabled. */
int DISABLED_MAX_CHUNK_LIMIT = -1;

/**
Expand All @@ -42,6 +49,7 @@ public interface Chunker {
* @param chunkResultSize the size of chunking result
* @param runtimeMaxChunkLimit runtime max_chunk_limit, used to check with chunkResultSize
* @param chunkStringCount runtime chunk_string_count, used to check with chunkResultSize
* @return true if adding the new chunks would exceed the limit, false otherwise
*/
static boolean checkRunTimeMaxChunkLimit(int chunkResultSize, int runtimeMaxChunkLimit, int chunkStringCount) {
return runtimeMaxChunkLimit != DISABLED_MAX_CHUNK_LIMIT && chunkResultSize + chunkStringCount >= runtimeMaxChunkLimit;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -25,8 +25,16 @@ private ChunkerFactory() {} // no instance of this factory class
DelimiterChunker::new
);

/** Set of supported chunker algorithm types */
public static Set<String> CHUNKER_ALGORITHMS = CHUNKERS_CONSTRUCTORS.keySet();

/**
* Creates a new Chunker instance based on the specified type and parameters.
*
* @param type the type of chunker to create
* @param parameters configuration parameters for the chunker
* @return a new Chunker instance configured with the given parameters
*/
public static Chunker create(final String type, final Map<String, Object> parameters) {
Function<Map<String, Object>, Chunker> chunkerConstructionFunction = CHUNKERS_CONSTRUCTORS.get(type);
// chunkerConstructionFunction is not null because we have validated the type in text chunking processor
Expand Down
Loading

0 comments on commit cea345d

Please sign in to comment.