Skip to content

Commit

Permalink
Update default engine to FAISS (#2221) (#2258)
Browse files Browse the repository at this point in the history
* Update default engine to FAISS

Since faiss supports more features than nmslib, and, we had seen
data points that there are more number of vector search
users are interesed in faiss, we will be updating default
engine to be faiss. This will benefit users who preffered
to use defaults while working with vector search.

Signed-off-by: Vijayan Balasubramanian <[email protected]>

* Update legacy mapping

Signed-off-by: Vijayan Balasubramanian <[email protected]>

* Create legacy mapping only up to V_2_17_2

Signed-off-by: Vijayan Balasubramanian <[email protected]>

* Update test engine

Signed-off-by: Vijayan Balasubramanian <[email protected]>

* Update test method

Signed-off-by: Vijayan Balasubramanian <[email protected]>

---------

Signed-off-by: Vijayan Balasubramanian <[email protected]>
(cherry picked from commit 7d34456)

Co-authored-by: Vijayan Balasubramanian <[email protected]>
  • Loading branch information
opensearch-trigger-bot[bot] and VijayanB authored Nov 8, 2024
1 parent 2e603a1 commit 1873211
Show file tree
Hide file tree
Showing 19 changed files with 129 additions and 77 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,23 @@ public void testKNNIndexDefaultLegacyFieldMapping() throws Exception {
}
}

// ensure that index is created using legacy mapping in old cluster, and, then, add docs to both old and new cluster.
// when search is requested on new cluster it should return all docs irrespective of cluster.
public void testKNNIndexDefaultEngine() throws Exception {
waitForClusterHealthGreen(NODES_BWC_CLUSTER);
if (isRunningAgainstOldCluster()) {
createKnnIndex(testIndex, getKNNDefaultIndexSettings(), createKnnIndexMapping(TEST_FIELD, DIMENSIONS));
addKNNDocs(testIndex, TEST_FIELD, DIMENSIONS, DOC_ID, 5);
// Flush to ensure that index is not re-indexed when node comes back up
flush(testIndex, true);
} else {
validateKNNSearch(testIndex, TEST_FIELD, DIMENSIONS, 5, 5);
addKNNDocs(testIndex, TEST_FIELD, DIMENSIONS, 5, 5);
validateKNNSearch(testIndex, TEST_FIELD, DIMENSIONS, 10, 10);
deleteKNNIndex(testIndex);
}
}

// Ensure that when segments created with old mapping are forcemerged in new cluster, they
// succeed
public void testKNNIndexDefaultLegacyFieldMappingForceMerge() throws Exception {
Expand Down
1 change: 1 addition & 0 deletions release-notes/opensearch-knn.release-notes-2.18.0.0.md
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ Compatible with OpenSearch 2.18.0
* Add CompressionLevel Calculation for PQ [#2200](https://github.com/opensearch-project/k-NN/pull/2200)
* Remove FSDirectory dependency from native engine constructing side and deprecated FileWatcher [#2182](https://github.com/opensearch-project/k-NN/pull/2182)
* Update approximate_threshold to 15K documents [#2229](https://github.com/opensearch-project/k-NN/pull/2229)
* Update default engine to FAISS [#2221](https://github.com/opensearch-project/k-NN/pull/2221)
### Bug Fixes
* Add DocValuesProducers for releasing memory when close index [#1946](https://github.com/opensearch-project/k-NN/pull/1946)
* KNN80DocValues should only be considered for BinaryDocValues fields [#2147](https://github.com/opensearch-project/k-NN/pull/2147)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@ public KNNEngine resolveEngine(

// For 1x, we need to default to faiss if mode is provided and use nmslib otherwise
if (CompressionLevel.isConfigured(compressionLevel) == false || compressionLevel == CompressionLevel.x1) {
return mode == Mode.ON_DISK ? KNNEngine.FAISS : KNNEngine.DEFAULT;
return mode == Mode.ON_DISK ? KNNEngine.FAISS : KNNEngine.NMSLIB;
}

// Lucene is only engine that supports 4x - so we have to default to it here.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ public enum KNNEngine implements KNNLibrary {
FAISS(FAISS_NAME, Faiss.INSTANCE),
LUCENE(LUCENE_NAME, Lucene.INSTANCE);

public static final KNNEngine DEFAULT = NMSLIB;
public static final KNNEngine DEFAULT = FAISS;

private static final Set<KNNEngine> CUSTOM_SEGMENT_FILE_ENGINES = ImmutableSet.of(KNNEngine.NMSLIB, KNNEngine.FAISS);
private static final Set<KNNEngine> ENGINES_SUPPORTING_FILTERS = ImmutableSet.of(KNNEngine.LUCENE, KNNEngine.FAISS);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -500,8 +500,7 @@ private void resolveKNNMethodComponents(
.build()
);

// If the original parameters are from legacy
if (builder.originalParameters.isLegacyMapping()) {
if (useKNNMethodContextFromLegacy(builder, parserContext)) {
// Then create KNNMethodContext to be used from the legacy index settings
builder.originalParameters.setResolvedKnnMethodContext(
createKNNMethodContextFromLegacy(parserContext.getSettings(), parserContext.indexVersionCreated(), resolvedSpaceType)
Expand Down Expand Up @@ -550,6 +549,12 @@ private void setEngine(final KNNMethodContext knnMethodContext, KNNEngine knnEng
}
}

static boolean useKNNMethodContextFromLegacy(Builder builder, Mapper.TypeParser.ParserContext parserContext) {
// If the original parameters are from legacy, and it is created on or before 2_17_2 since default is changed to
// FAISS starting 2_18, which doesn't support accepting algo params from index settings
return parserContext.indexVersionCreated().onOrBefore(Version.V_2_17_2) && builder.originalParameters.isLegacyMapping();
}

// We store the version of the index with the mapper as different version of Opensearch has different default
// values of KNN engine Algorithms hyperparameters.
protected Version indexCreatedVersion;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -251,7 +251,7 @@ private Map<Integer, Float> doANNSearch(
spaceType = modelMetadata.getSpaceType();
vectorDataType = modelMetadata.getVectorDataType();
} else {
String engineName = fieldInfo.attributes().getOrDefault(KNN_ENGINE, KNNEngine.NMSLIB.getName());
String engineName = fieldInfo.attributes().getOrDefault(KNN_ENGINE, KNNEngine.DEFAULT.getName());
knnEngine = KNNEngine.getEngine(engineName);
String spaceTypeName = fieldInfo.attributes().getOrDefault(SPACE_TYPE, SpaceType.L2.getValue());
spaceType = SpaceType.getSpace(spaceTypeName);
Expand Down
18 changes: 18 additions & 0 deletions src/test/java/org/opensearch/knn/KNNSingleNodeTestCase.java
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
import org.opensearch.cluster.service.ClusterService;
import org.opensearch.common.xcontent.XContentHelper;
import org.opensearch.knn.index.KNNSettings;
import org.opensearch.knn.index.engine.KNNEngine;
import org.opensearch.knn.index.query.KNNQueryBuilder;
import org.opensearch.knn.index.memory.NativeMemoryCacheManager;
import org.opensearch.knn.index.memory.NativeMemoryLoadStrategy;
Expand Down Expand Up @@ -50,6 +51,7 @@
import static org.mockito.Mockito.when;
import static org.opensearch.knn.common.KNNConstants.DIMENSION;
import static org.opensearch.knn.common.KNNConstants.KNN_ENGINE;
import static org.opensearch.knn.common.KNNConstants.METHOD_HNSW;
import static org.opensearch.knn.common.KNNConstants.METHOD_PARAMETER_SPACE_TYPE;
import static org.opensearch.knn.common.KNNConstants.MODEL_BLOB_PARAMETER;
import static org.opensearch.knn.common.KNNConstants.MODEL_DESCRIPTION;
Expand Down Expand Up @@ -109,6 +111,22 @@ protected void createKnnIndexMapping(String indexName, String fieldName, Integer
/**
* Create simple k-NN mapping with engine
*/
protected void createKnnIndexMapping(String indexName, String fieldName, Integer dimensions, KNNEngine engine) throws IOException {
PutMappingRequest request = new PutMappingRequest(indexName);
XContentBuilder xContentBuilder = XContentFactory.jsonBuilder().startObject().startObject("properties");
xContentBuilder.startObject(fieldName);
xContentBuilder.field("type", "knn_vector").field("dimension", dimensions.toString());
xContentBuilder.startObject("method");
xContentBuilder.field("name", METHOD_HNSW);
xContentBuilder.field(KNN_ENGINE, engine.getName());
xContentBuilder.endObject();
xContentBuilder.endObject();
xContentBuilder.endObject();
xContentBuilder.endObject();
request.source(xContentBuilder);
OpenSearchAssertions.assertAcked(client().admin().indices().putMapping(request).actionGet());
}

protected void updateIndexSetting(String indexName, Settings setting) {
UpdateSettingsRequest request = new UpdateSettingsRequest(setting, indexName);
OpenSearchAssertions.assertAcked(client().admin().indices().updateSettings(request).actionGet());
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
import org.opensearch.index.IndexService;
import org.opensearch.index.engine.Engine;
import org.opensearch.index.shard.IndexShard;
import org.opensearch.knn.index.engine.KNNEngine;
import org.opensearch.knn.index.memory.NativeMemoryCacheManager;

import java.io.IOException;
Expand Down Expand Up @@ -108,7 +109,7 @@ public void testWarmup_shardNotPresentInCache() throws InterruptedException, Exe

public void testGetAllEngineFileContexts() throws IOException, ExecutionException, InterruptedException {
IndexService indexService = createKNNIndex(testIndexName);
createKnnIndexMapping(testIndexName, testFieldName, dimensions);
createKnnIndexMapping(testIndexName, testFieldName, dimensions, KNNEngine.NMSLIB);
updateIndexSetting(testIndexName, Settings.builder().put(KNNSettings.INDEX_KNN_ADVANCED_APPROXIMATE_THRESHOLD, 0).build());

IndexShard indexShard = indexService.iterator().next();
Expand Down
5 changes: 5 additions & 0 deletions src/test/java/org/opensearch/knn/index/NmslibIT.java
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@
import java.util.TreeMap;

import static org.hamcrest.Matchers.containsString;
import static org.opensearch.knn.common.KNNConstants.KNN_ENGINE;

public class NmslibIT extends KNNRestTestCase {

Expand Down Expand Up @@ -281,6 +282,7 @@ public void testCreateIndexWithValidAlgoParams_mapping() {
.field("dimension", 2)
.startObject(KNNConstants.KNN_METHOD)
.field(KNNConstants.METHOD_PARAMETER_SPACE_TYPE, spaceType)
.field(KNN_ENGINE, KNNEngine.NMSLIB.getName())
.field(KNNConstants.NAME, KNNConstants.METHOD_HNSW)
.startObject(KNNConstants.PARAMETERS)
.field(KNNConstants.METHOD_PARAMETER_EF_CONSTRUCTION, efConstruction)
Expand Down Expand Up @@ -336,6 +338,7 @@ public void testCreateIndexWithValidAlgoParams_mappingAndSettings() {
.field("dimension", 2)
.startObject(KNNConstants.KNN_METHOD)
.field(KNNConstants.METHOD_PARAMETER_SPACE_TYPE, spaceType1)
.field(KNN_ENGINE, KNNEngine.NMSLIB.getName())
.field(KNNConstants.NAME, KNNConstants.METHOD_HNSW)
.startObject(KNNConstants.PARAMETERS)
.field(KNNConstants.METHOD_PARAMETER_EF_CONSTRUCTION, efConstruction1)
Expand Down Expand Up @@ -363,6 +366,7 @@ public void testCreateIndexWithValidAlgoParams_mappingAndSettings() {
.field("dimension", 2)
.startObject(KNNConstants.KNN_METHOD)
.field(KNNConstants.METHOD_PARAMETER_SPACE_TYPE, spaceType1)
.field(KNN_ENGINE, KNNEngine.NMSLIB.getName())
.field(KNNConstants.NAME, KNNConstants.METHOD_HNSW)
.startObject(KNNConstants.PARAMETERS)
.field(KNNConstants.METHOD_PARAMETER_EF_CONSTRUCTION, efConstruction1)
Expand All @@ -375,6 +379,7 @@ public void testCreateIndexWithValidAlgoParams_mappingAndSettings() {
.field("dimension", 2)
.startObject(KNNConstants.KNN_METHOD)
.field(KNNConstants.METHOD_PARAMETER_SPACE_TYPE, spaceType2)
.field(KNN_ENGINE, KNNEngine.NMSLIB.getName())
.field(KNNConstants.NAME, KNNConstants.METHOD_HNSW)
.startObject(KNNConstants.PARAMETERS)
.field(KNNConstants.METHOD_PARAMETER_EF_CONSTRUCTION, efConstruction2)
Expand Down
23 changes: 0 additions & 23 deletions src/test/java/org/opensearch/knn/index/VectorDataTypeIT.java
Original file line number Diff line number Diff line change
Expand Up @@ -263,29 +263,6 @@ public void testByteVectorDataTypeWithNmslibEngine() {
assertTrue(ex.getMessage().contains("is not supported for vector data type"));
}

@SneakyThrows
public void testByteVectorDataTypeWithLegacyFieldMapperKnnIndexSetting() {
// Create an index with byte vector data_type and index.knn as true without setting KnnMethodContext,
// which should throw an exception because the LegacyFieldMapper will use NMSLIB engine and byte data_type
// is not supported for NMSLIB engine.
XContentBuilder builder = XContentFactory.jsonBuilder()
.startObject()
.startObject(PROPERTIES_FIELD)
.startObject(FIELD_NAME)
.field(TYPE_FIELD_NAME, KNN_VECTOR_TYPE)
.field(DIMENSION, 2)
.field(VECTOR_DATA_TYPE_FIELD, VectorDataType.BYTE.getValue())
.endObject()
.endObject()
.endObject();

String mapping = builder.toString();

ResponseException ex = expectThrows(ResponseException.class, () -> createKnnIndex(INDEX_NAME, mapping));
assertTrue(ex.getMessage(), ex.getMessage().contains("is not supported for vector data type"));

}

public void testDocValuesWithByteVectorDataTypeLuceneEngine() throws Exception {
createKnnIndexMappingWithLuceneEngine(2, SpaceType.L2, VectorDataType.BYTE.getValue());
ingestL2ByteTestData();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -279,6 +279,7 @@ public void testAddKNNBinaryField_fromScratch_nmslibLegacy() throws IOException
.addAttribute(KNNConstants.HNSW_ALGO_EF_CONSTRUCTION, "512")
.addAttribute(KNNConstants.HNSW_ALGO_M, "16")
.addAttribute(KNNConstants.SPACE_TYPE, spaceType.getValue())
.addAttribute(KNNConstants.KNN_ENGINE, knnEngine.getName())
.build() };

FieldInfos fieldInfos = new FieldInfos(fieldInfoArray);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -103,7 +103,7 @@ public class KNNCodecTestCase extends KNNTestCase {
.vectorDataType(VectorDataType.DEFAULT)
.build();
KNNMethodContext knnMethodContext = new KNNMethodContext(
KNNEngine.DEFAULT,
KNNEngine.NMSLIB,
SpaceType.DEFAULT,
new MethodComponentContext(METHOD_HNSW, ImmutableMap.of(METHOD_PARAMETER_M, 16, METHOD_PARAMETER_EF_CONSTRUCTION, 512))
);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -41,10 +41,10 @@ public void testResolveEngine_whenModeAndCompressionAreFalse_thenDefault() {
);
}

public void testResolveEngine_whenModeSpecifiedAndCompressionIsNotSpecified_thenDefault() {
public void testResolveEngine_whenModeSpecifiedAndCompressionIsNotSpecified_thenNMSLIB() {
assertEquals(KNNEngine.DEFAULT, ENGINE_RESOLVER.resolveEngine(KNNMethodConfigContext.builder().build(), null, false));
assertEquals(
KNNEngine.DEFAULT,
KNNEngine.NMSLIB,
ENGINE_RESOLVER.resolveEngine(
KNNMethodConfigContext.builder().mode(Mode.IN_MEMORY).build(),
new KNNMethodContext(KNNEngine.DEFAULT, SpaceType.UNDEFINED, MethodComponentContext.EMPTY, false),
Expand All @@ -63,7 +63,7 @@ public void testResolveEngine_whenCompressionIs1x_thenEngineBasedOnMode() {
)
);
assertEquals(
KNNEngine.DEFAULT,
KNNEngine.NMSLIB,
ENGINE_RESOLVER.resolveEngine(KNNMethodConfigContext.builder().compressionLevel(CompressionLevel.x1).build(), null, false)
);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,10 @@ public void testDelegateLibraryFunctions() {
assertEquals(Lucene.INSTANCE.getVersion(), KNNEngine.LUCENE.getVersion());
}

public void testGetDefaultEngine_thenReturnFAISS() {
assertEquals(KNNEngine.FAISS, KNNEngine.DEFAULT);
}

/**
* Test name getter
*/
Expand Down
Loading

0 comments on commit 1873211

Please sign in to comment.