From 7160f6e3dfb4ada53921dca77a1378d9bdf7b658 Mon Sep 17 00:00:00 2001 From: Vijayan Balasubramanian Date: Tue, 3 Dec 2024 13:16:46 -0800 Subject: [PATCH 1/4] Validate knn params only for version created from 2.17 Before 2.17, this validatin was not added, hence, users can set method params and model id eventhough knn index settings was set to false. This commit will apply validation only for index created from 2.17 onwards. Signed-off-by: Vijayan Balasubramanian --- .../opensearch/knn/index/mapper/KNNVectorFieldMapper.java | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/src/main/java/org/opensearch/knn/index/mapper/KNNVectorFieldMapper.java b/src/main/java/org/opensearch/knn/index/mapper/KNNVectorFieldMapper.java index 6e5138a56..6d7a26d0f 100644 --- a/src/main/java/org/opensearch/knn/index/mapper/KNNVectorFieldMapper.java +++ b/src/main/java/org/opensearch/knn/index/mapper/KNNVectorFieldMapper.java @@ -250,7 +250,7 @@ public KNNVectorFieldMapper build(BuilderContext context) { ); } - if (originalParameters.getResolvedKnnMethodContext() == null) { + if (originalParameters.getResolvedKnnMethodContext() == null && context.indexCreatedVersion().onOrAfter(Version.V_2_17_0)) { return FlatVectorFieldMapper.createFieldMapper( buildFullName(context), name, @@ -362,9 +362,10 @@ public Mapper.Builder parse(String name, Map node, ParserCont String.format(Locale.ROOT, "Method and model can not be both specified in the mapping: %s", name) ); } - // Check for flat configuration - if (isKNNDisabled(parserContext.getSettings())) { + if (isKNNDisabled(parserContext.getSettings()) && parserContext.indexVersionCreated().onOrAfter(Version.V_2_17_0)) { + // on and after 2_17_0 we validate to makes sure that mapping doesn't contain parameters that are + // specific to approximate knn search algorithms validateFromFlat(builder); } else if (builder.modelId.get() != null) { validateFromModel(builder); From d465fd908f865b8ef5e083657b8770e363c15245 Mon Sep 17 00:00:00 2001 From: Vijayan Balasubramanian Date: Tue, 3 Dec 2024 13:18:22 -0800 Subject: [PATCH 2/4] Added unit and integtest Signed-off-by: Vijayan Balasubramanian --- .../opensearch/knn/index/OpenSearchIT.java | 23 +++++++++++++++++++ .../org/opensearch/knn/KNNRestTestCase.java | 16 +++++++++++++ 2 files changed, 39 insertions(+) diff --git a/src/test/java/org/opensearch/knn/index/OpenSearchIT.java b/src/test/java/org/opensearch/knn/index/OpenSearchIT.java index c1d3b47c3..1b491da02 100644 --- a/src/test/java/org/opensearch/knn/index/OpenSearchIT.java +++ b/src/test/java/org/opensearch/knn/index/OpenSearchIT.java @@ -483,4 +483,27 @@ public void testIndexingVectorValidation_updateVectorWithNull() throws Exception assertArrayEquals(vectorForDocumentOne, vectorRestoreInitialValue); } + public void testCreateNonKNNIndex_withKNNModelID() throws Exception { + Settings settings = Settings.builder().put(createKNNDefaultScriptScoreSettings()).build(); + ResponseException ex = expectThrows( + ResponseException.class, + () -> createKnnIndex(INDEX_NAME, settings, createKnnIndexMapping(FIELD_NAME, "random-model-id")) + ); + String expMessage = "Cannot set modelId or method parameters when index.knn setting is false"; + assertThat(EntityUtils.toString(ex.getResponse().getEntity()), containsString(expMessage)); + } + + public void testCreateNonKNNIndex_withKNNMethodParams() throws Exception { + Settings settings = Settings.builder().put(createKNNDefaultScriptScoreSettings()).build(); + ResponseException ex = expectThrows( + ResponseException.class, + () -> createKnnIndex( + INDEX_NAME, + settings, + createKnnIndexMapping(FIELD_NAME, 2, "hnsw", KNNEngine.FAISS.getName(), SpaceType.DEFAULT.getValue(), false) + ) + ); + String expMessage = "Cannot set modelId or method parameters when index.knn setting is false"; + assertThat(EntityUtils.toString(ex.getResponse().getEntity()), containsString(expMessage)); + } } diff --git a/src/testFixtures/java/org/opensearch/knn/KNNRestTestCase.java b/src/testFixtures/java/org/opensearch/knn/KNNRestTestCase.java index a4659f691..5ef1c7cc2 100644 --- a/src/testFixtures/java/org/opensearch/knn/KNNRestTestCase.java +++ b/src/testFixtures/java/org/opensearch/knn/KNNRestTestCase.java @@ -400,6 +400,22 @@ protected String createKnnIndexMapping(final String fieldName, final Integer dim .toString(); } + /** + * Utility to create a Knn Index Mapping with model id + */ + protected String createKnnIndexMapping(String fieldName, String modelId) throws IOException { + return XContentFactory.jsonBuilder() + .startObject() + .startObject("properties") + .startObject(fieldName) + .field("type", "knn_vector") + .field("model_id", modelId) + .endObject() + .endObject() + .endObject() + .toString(); + } + /** * Utility to create a Knn Index Mapping with specific algorithm and engine */ From d6d6e72625afb6339a49eeeee3fde9b519d0f4ff Mon Sep 17 00:00:00 2001 From: Vijayan Balasubramanian Date: Tue, 3 Dec 2024 13:18:39 -0800 Subject: [PATCH 3/4] Add BWC test Signed-off-by: Vijayan Balasubramanian --- .../java/org/opensearch/knn/bwc/ModelIT.java | 2 +- .../opensearch/knn/bwc/ScriptScoringIT.java | 129 ++++++++++++++++++ 2 files changed, 130 insertions(+), 1 deletion(-) diff --git a/qa/restart-upgrade/src/test/java/org/opensearch/knn/bwc/ModelIT.java b/qa/restart-upgrade/src/test/java/org/opensearch/knn/bwc/ModelIT.java index 7df651a3c..29a908887 100644 --- a/qa/restart-upgrade/src/test/java/org/opensearch/knn/bwc/ModelIT.java +++ b/qa/restart-upgrade/src/test/java/org/opensearch/knn/bwc/ModelIT.java @@ -168,7 +168,7 @@ public void searchKNNModel(String testModelID) throws IOException { XContentParser parser = createParser(XContentType.JSON.xContent(), responseBody); SearchResponse searchResponse = SearchResponse.fromXContent(parser); assertNotNull(searchResponse); - assertEquals(EXP_NUM_OF_MODELS, searchResponse.getHits().getHits().length); + assertTrue(EXP_NUM_OF_MODELS <= searchResponse.getHits().getHits().length); for (SearchHit hit : searchResponse.getHits().getHits()) { assertTrue(hit.getId().startsWith(testModelID)); diff --git a/qa/restart-upgrade/src/test/java/org/opensearch/knn/bwc/ScriptScoringIT.java b/qa/restart-upgrade/src/test/java/org/opensearch/knn/bwc/ScriptScoringIT.java index 2932f32fb..fa75eb039 100644 --- a/qa/restart-upgrade/src/test/java/org/opensearch/knn/bwc/ScriptScoringIT.java +++ b/qa/restart-upgrade/src/test/java/org/opensearch/knn/bwc/ScriptScoringIT.java @@ -8,21 +8,34 @@ import org.apache.http.util.EntityUtils; import org.opensearch.client.Request; import org.opensearch.client.Response; +import org.opensearch.common.xcontent.XContentFactory; +import org.opensearch.common.xcontent.XContentType; +import org.opensearch.core.xcontent.XContentBuilder; import org.opensearch.index.query.MatchAllQueryBuilder; import org.opensearch.index.query.QueryBuilder; import org.opensearch.knn.IDVectorProducer; import org.opensearch.knn.KNNResult; import org.opensearch.knn.index.SpaceType; import org.opensearch.core.rest.RestStatus; +import org.opensearch.knn.index.engine.KNNEngine; +import java.io.IOException; import java.util.Collections; import java.util.HashMap; import java.util.List; import java.util.Map; import static org.opensearch.knn.TestUtils.FIELD; +import static org.opensearch.knn.TestUtils.KNN_BWC_PREFIX; import static org.opensearch.knn.TestUtils.QUERY_VALUE; +import static org.opensearch.knn.common.KNNConstants.FAISS_NAME; +import static org.opensearch.knn.common.KNNConstants.KNN_ENGINE; +import static org.opensearch.knn.common.KNNConstants.METHOD_IVF; +import static org.opensearch.knn.common.KNNConstants.METHOD_PARAMETER_NLIST; import static org.opensearch.knn.common.KNNConstants.METHOD_PARAMETER_SPACE_TYPE; +import static org.opensearch.knn.common.KNNConstants.MODEL_ID; +import static org.opensearch.knn.common.KNNConstants.NAME; +import static org.opensearch.knn.common.KNNConstants.PARAMETERS; public class ScriptScoringIT extends AbstractRestartUpgradeTestCase { private static final String TEST_FIELD = "test-field"; @@ -31,6 +44,10 @@ public class ScriptScoringIT extends AbstractRestartUpgradeTestCase { private static final int K = 5; private static final int NUM_DOCS = 10; private static int QUERY_COUNT = 0; + private static final String TRAINING_INDEX_DEFAULT = KNN_BWC_PREFIX + "train-index-default-1"; + private static final String TRAINING_FIELD = "train-field"; + private static final String TEST_MODEL_ID_DEFAULT = "test-model-id-default-1"; + private static final String MODEL_DESCRIPTION = "Description for train model test"; // KNN script scoring for space_type "l2" public void testKNNL2ScriptScore() throws Exception { @@ -122,4 +139,116 @@ private void validateKNNInnerProductScriptScoreSearch(String testIndex, String t } } + public void testNonKNNIndex_withMethodParams_withFaissEngine() throws Exception { + if (isRunningAgainstOldCluster()) { + createKnnIndex( + testIndex, + createKNNDefaultScriptScoreSettings(), + createKnnIndexMapping(TEST_FIELD, DIMENSIONS, "hnsw", KNNEngine.FAISS.getName(), SpaceType.DEFAULT.getValue(), false) + ); + addKNNDocs(testIndex, TEST_FIELD, DIMENSIONS, DOC_ID, NUM_DOCS); + } else { + QUERY_COUNT = NUM_DOCS; + DOC_ID = NUM_DOCS; + validateKNNScriptScoreSearch(testIndex, TEST_FIELD, DIMENSIONS, QUERY_COUNT, K, SpaceType.L2); + addKNNDocs(testIndex, TEST_FIELD, DIMENSIONS, DOC_ID, NUM_DOCS); + QUERY_COUNT = QUERY_COUNT + NUM_DOCS; + validateKNNScriptScoreSearch(testIndex, TEST_FIELD, DIMENSIONS, QUERY_COUNT, K, SpaceType.L2); + deleteKNNIndex(testIndex); + } + } + + public void testNonKNNIndex_withMethodParams_withLuceneEngine() throws Exception { + if (isRunningAgainstOldCluster()) { + createKnnIndex( + testIndex, + createKNNDefaultScriptScoreSettings(), + createKnnIndexMapping(TEST_FIELD, DIMENSIONS, "hnsw", KNNEngine.LUCENE.getName(), SpaceType.DEFAULT.getValue(), false) + ); + addKNNDocs(testIndex, TEST_FIELD, DIMENSIONS, DOC_ID, NUM_DOCS); + } else { + QUERY_COUNT = NUM_DOCS; + DOC_ID = NUM_DOCS; + validateKNNScriptScoreSearch(testIndex, TEST_FIELD, DIMENSIONS, QUERY_COUNT, K, SpaceType.L2); + addKNNDocs(testIndex, TEST_FIELD, DIMENSIONS, DOC_ID, NUM_DOCS); + forceMergeKnnIndex(testIndex, 1); + QUERY_COUNT = QUERY_COUNT + NUM_DOCS; + validateKNNScriptScoreSearch(testIndex, TEST_FIELD, DIMENSIONS, QUERY_COUNT, K, SpaceType.L2); + deleteKNNIndex(testIndex); + } + } + + public void testNonKNNIndex_withMethodParams_withNMSLIBEngine() throws Exception { + if (isRunningAgainstOldCluster()) { + createKnnIndex( + testIndex, + createKNNDefaultScriptScoreSettings(), + createKnnIndexMapping(TEST_FIELD, DIMENSIONS, "hnsw", KNNEngine.NMSLIB.getName(), SpaceType.DEFAULT.getValue(), false) + ); + addKNNDocs(testIndex, TEST_FIELD, DIMENSIONS, DOC_ID, NUM_DOCS); + } else { + QUERY_COUNT = NUM_DOCS; + DOC_ID = NUM_DOCS; + validateKNNScriptScoreSearch(testIndex, TEST_FIELD, DIMENSIONS, QUERY_COUNT, K, SpaceType.L2); + addKNNDocs(testIndex, TEST_FIELD, DIMENSIONS, DOC_ID, NUM_DOCS); + forceMergeKnnIndex(testIndex, 1); + QUERY_COUNT = QUERY_COUNT + NUM_DOCS; + validateKNNScriptScoreSearch(testIndex, TEST_FIELD, DIMENSIONS, QUERY_COUNT, K, SpaceType.L2); + deleteKNNIndex(testIndex); + } + } + + public void testNonKNNIndex_withModelId() throws Exception { + if (isRunningAgainstOldCluster()) { + // Create a training index and randomly ingest data into it + createBasicKnnIndex(TRAINING_INDEX_DEFAULT, TRAINING_FIELD, DIMENSIONS); + bulkIngestRandomVectors(TRAINING_INDEX_DEFAULT, TRAINING_FIELD, NUM_DOCS, DIMENSIONS); + + trainKNNModel(TEST_MODEL_ID_DEFAULT, TRAINING_INDEX_DEFAULT, TRAINING_FIELD, DIMENSIONS, MODEL_DESCRIPTION); + validateModelCreated(TEST_MODEL_ID_DEFAULT); + + createKnnIndex(testIndex, createKNNDefaultScriptScoreSettings(), createKnnIndexMapping(TEST_FIELD, TEST_MODEL_ID_DEFAULT)); + addKNNDocs(testIndex, TEST_FIELD, DIMENSIONS, DOC_ID, NUM_DOCS); + } else { + QUERY_COUNT = NUM_DOCS; + DOC_ID = NUM_DOCS; + validateKNNScriptScoreSearch(testIndex, TEST_FIELD, DIMENSIONS, QUERY_COUNT, K, SpaceType.L2); + addKNNDocs(testIndex, TEST_FIELD, DIMENSIONS, DOC_ID, NUM_DOCS); + QUERY_COUNT = QUERY_COUNT + NUM_DOCS; + validateKNNScriptScoreSearch(testIndex, TEST_FIELD, DIMENSIONS, QUERY_COUNT, K, SpaceType.L2); + deleteKNNIndex(testIndex); + deleteModel(TEST_MODEL_ID_DEFAULT); + } + } + + // train KNN model + // method : "ivf", engine : "faiss", space_type : "l2", nlists : 1 + public void trainKNNModel(String modelId, String trainingIndexName, String trainingFieldName, int dimension, String description) + throws IOException { + XContentBuilder builder = XContentFactory.jsonBuilder() + .startObject() + .field(NAME, METHOD_IVF) + .field(KNN_ENGINE, FAISS_NAME) + .field(METHOD_PARAMETER_SPACE_TYPE, SpaceType.L2.getValue()) + .startObject(PARAMETERS) + .field(METHOD_PARAMETER_NLIST, 1) + .endObject() + .endObject(); + Map method = xContentBuilderToMap(builder); + + Response trainResponse = trainModel(modelId, trainingIndexName, trainingFieldName, dimension, method, description); + assertEquals(RestStatus.OK, RestStatus.fromCode(trainResponse.getStatusLine().getStatusCode())); + } + + // Confirm that the model gets created using Get Model API + public void validateModelCreated(String modelId) throws Exception { + Response getResponse = getModel(modelId, null); + String responseBody = EntityUtils.toString(getResponse.getEntity()); + assertNotNull(responseBody); + + Map responseMap = createParser(XContentType.JSON.xContent(), responseBody).map(); + assertEquals(modelId, responseMap.get(MODEL_ID)); + assertTrainingSucceeds(modelId, NUM_OF_ATTEMPTS, DELAY_MILLI_SEC); + } + } From 2bc2d6be4ec10d0887ee835e6dbb1a65a7732bec Mon Sep 17 00:00:00 2001 From: Vijayan Balasubramanian Date: Tue, 3 Dec 2024 13:18:56 -0800 Subject: [PATCH 4/4] update workflow Signed-off-by: Vijayan Balasubramanian --- .github/workflows/CI.yml | 16 +++++++++------- .../backwards_compatibility_tests_workflow.yml | 2 +- .github/workflows/test_security.yml | 11 ++++++----- 3 files changed, 16 insertions(+), 13 deletions(-) diff --git a/.github/workflows/CI.yml b/.github/workflows/CI.yml index 932ce8022..e7b4b811d 100644 --- a/.github/workflows/CI.yml +++ b/.github/workflows/CI.yml @@ -30,8 +30,7 @@ on: - 'jni/**' - 'micro-benchmarks/**' - '.github/workflows/CI.yml' -env: - ACTIONS_ALLOW_USE_UNSECURE_NODE_VERSION: true + jobs: Get-CI-Image-Tag: @@ -52,11 +51,13 @@ jobs: # this image tag is subject to change as more dependencies and updates will arrive over time image: ${{ needs.Get-CI-Image-Tag.outputs.ci-image-version-linux }} # need to switch to root so that github actions can install runner binary on container without permission issues. - options: --user root + options: ${{ needs.Get-CI-Image-Tag.outputs.ci-image-start-options }} steps: + - name: Run start commands + run: ${{ needs.Get-CI-Image-Tag.outputs.ci-image-start-command }} - name: Checkout k-NN - uses: actions/checkout@v1 + uses: actions/checkout@v4 # Setup git user so that patches for native libraries can be applied and committed - name: Setup git user @@ -65,8 +66,9 @@ jobs: su `id -un 1000` -c 'git config --global user.email "github-actions[bot]@users.noreply.github.com"' - name: Setup Java ${{ matrix.java }} - uses: actions/setup-java@v1 + uses: actions/setup-java@v4 with: + distribution: "corretto" java-version: ${{ matrix.java }} - name: Run build @@ -84,7 +86,7 @@ jobs: - name: Upload Coverage Report - uses: codecov/codecov-action@v1 + uses: codecov/codecov-action@v4 with: token: ${{ secrets.CODECOV_TOKEN }} @@ -99,7 +101,7 @@ jobs: steps: - name: Checkout k-NN - uses: actions/checkout@v1 + uses: actions/checkout@v4 # Setup git user so that patches for native libraries can be applied and committed - name: Setup git user diff --git a/.github/workflows/backwards_compatibility_tests_workflow.yml b/.github/workflows/backwards_compatibility_tests_workflow.yml index 76b3caff5..ac5a2b3b8 100644 --- a/.github/workflows/backwards_compatibility_tests_workflow.yml +++ b/.github/workflows/backwards_compatibility_tests_workflow.yml @@ -34,7 +34,7 @@ jobs: strategy: matrix: java: [ 11, 17 ] - bwc_version : [ "1.1.0", "1.2.4", "1.3.8", "2.0.1", "2.1.0", "2.2.1", "2.3.0", "2.4.1", "2.5.0", "2.6.0", "2.7.0", "2.8.0", "2.9.0", "2.10.0", "2.11.0", "2.12.0", "2.13.0", "2.14.0", "2.15.0", "2.16.0", "2.17.0" ] + bwc_version : [ "2.15.0", "2.16.0" ] opensearch_version : [ "2.17.1-SNAPSHOT" ] name: k-NN Restart-Upgrade BWC Tests diff --git a/.github/workflows/test_security.yml b/.github/workflows/test_security.yml index 2f8df8526..aefb555c4 100644 --- a/.github/workflows/test_security.yml +++ b/.github/workflows/test_security.yml @@ -28,8 +28,6 @@ on: - 'gradle/**' - 'jni/**' - '.github/workflows/test_security.yml' -env: - ACTIONS_ALLOW_USE_UNSECURE_NODE_VERSION: true jobs: Get-CI-Image-Tag: @@ -50,11 +48,13 @@ jobs: # this image tag is subject to change as more dependencies and updates will arrive over time image: ${{ needs.Get-CI-Image-Tag.outputs.ci-image-version-linux }} # need to switch to root so that github actions can install runner binary on container without permission issues. - options: --user root + options: ${{ needs.Get-CI-Image-Tag.outputs.ci-image-start-options }} steps: + - name: Run start commands + run: ${{ needs.Get-CI-Image-Tag.outputs.ci-image-start-command }} - name: Checkout k-NN - uses: actions/checkout@v1 + uses: actions/checkout@v4 # Setup git user so that patches for native libraries can be applied and committed - name: Setup git user run: | @@ -62,9 +62,10 @@ jobs: su `id -un 1000` -c 'git config --global user.email "github-actions[bot]@users.noreply.github.com"' - name: Setup Java ${{ matrix.java }} - uses: actions/setup-java@v1 + uses: actions/setup-java@v4 with: java-version: ${{ matrix.java }} + distribution: "corretto" - name: Run build # switching the user, as OpenSearch cluster can only be started as root/Administrator on linux-deb/linux-rpm/windows-zip.