Skip to content

Commit

Permalink
Increase Lucene max dimension limit to 16,000 (#1346)
Browse files Browse the repository at this point in the history
* Increase Lucene max dimension limit to 16,000

Signed-off-by: Junqiu Lei <[email protected]>
  • Loading branch information
junqiu-lei authored Dec 13, 2023
1 parent 33da521 commit 083ea2b
Show file tree
Hide file tree
Showing 7 changed files with 25 additions and 9 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
### Features
* Add parent join support for lucene knn [#1182](https://github.com/opensearch-project/k-NN/pull/1182)
### Enhancements
* Increase Lucene max dimension limit to 16,000 [#1346](https://github.com/opensearch-project/k-NN/pull/1346)
### Bug Fixes
* Fix use-after-free case on nmslib search path [#1305](https://github.com/opensearch-project/k-NN/pull/1305)
* Allow nested knn field mapping when train model [#1318](https://github.com/opensearch-project/k-NN/pull/1318)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,11 @@ public KnnVectorsFormat getKnnVectorsFormatForField(final String field) {
return formatSupplier.apply(maxConnections, beamWidth);
}

@Override
public int getMaxDimensions(String fieldName) {
return getKnnVectorsFormatForField(fieldName).getMaxDimensions(fieldName);
}

private boolean isKnnVectorFieldType(final String field) {
return mapperService.isPresent() && mapperService.get().fieldType(field) instanceof KNNVectorFieldMapper.KNNVectorFieldType;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
import org.apache.lucene.codecs.lucene95.Lucene95HnswVectorsFormat;
import org.opensearch.index.mapper.MapperService;
import org.opensearch.knn.index.codec.BasePerFieldKnnVectorsFormat;
import org.opensearch.knn.index.util.KNNEngine;

import java.util.Optional;

Expand All @@ -25,4 +26,15 @@ public KNN950PerFieldKnnVectorsFormat(final Optional<MapperService> mapperServic
(maxConnm, beamWidth) -> new Lucene95HnswVectorsFormat(maxConnm, beamWidth)
);
}

@Override
/**
* This method returns the maximum dimension allowed from KNNEngine for Lucene codec
*
* @param fieldName Name of the field, ignored
* @return Maximum constant dimension set by KNNEngine
*/
public int getMaxDimensions(String fieldName) {
return KNNEngine.getMaxDimensionByEngine(KNNEngine.LUCENE);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,6 @@
import java.util.Locale;
import java.util.Optional;

import org.apache.lucene.codecs.KnnVectorsFormat;
import static org.opensearch.knn.common.KNNConstants.VECTOR_DATA_TYPE_FIELD;
import static org.opensearch.knn.index.mapper.KNNVectorFieldMapperUtil.addStoredFieldForVectorField;
import static org.opensearch.knn.index.mapper.KNNVectorFieldMapperUtil.buildDocValuesFieldType;
Expand All @@ -33,8 +32,6 @@
*/
public class LuceneFieldMapper extends KNNVectorFieldMapper {

private static final int LUCENE_MAX_DIMENSION = KnnVectorsFormat.DEFAULT_MAX_DIMENSIONS;

/** FieldType used for initializing VectorField, which is used for creating binary doc values. **/
private final FieldType vectorFieldType;
private final VectorDataType vectorDataType;
Expand All @@ -55,12 +52,12 @@ public class LuceneFieldMapper extends KNNVectorFieldMapper {
final VectorSimilarityFunction vectorSimilarityFunction = this.knnMethod.getSpaceType().getVectorSimilarityFunction();

final int dimension = input.getMappedFieldType().getDimension();
if (dimension > LUCENE_MAX_DIMENSION) {
if (dimension > KNNEngine.getMaxDimensionByEngine(KNNEngine.LUCENE)) {
throw new IllegalArgumentException(
String.format(
Locale.ROOT,
"Dimension value cannot be greater than [%s] but got [%s] for vector [%s]",
LUCENE_MAX_DIMENSION,
KNNEngine.getMaxDimensionByEngine(KNNEngine.LUCENE),
dimension,
input.getName()
)
Expand Down
3 changes: 1 addition & 2 deletions src/main/java/org/opensearch/knn/index/util/KNNEngine.java
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,6 @@
package org.opensearch.knn.index.util;

import com.google.common.collect.ImmutableSet;
import org.apache.lucene.codecs.KnnVectorsFormat;
import org.opensearch.common.ValidationException;
import org.opensearch.knn.index.KNNMethod;
import org.opensearch.knn.index.KNNMethodContext;
Expand Down Expand Up @@ -40,7 +39,7 @@ public enum KNNEngine implements KNNLibrary {
KNNEngine.FAISS,
16_000,
KNNEngine.LUCENE,
KnnVectorsFormat.DEFAULT_MAX_DIMENSIONS
16_000
);

/**
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -338,6 +338,7 @@ public void testKnnVectorIndex(
writer.close();

verify(perFieldKnnVectorsFormatSpy, atLeastOnce()).getKnnVectorsFormatForField(eq(FIELD_NAME_ONE));
verify(perFieldKnnVectorsFormatSpy, atLeastOnce()).getMaxDimensions(eq(FIELD_NAME_ONE));

IndexSearcher searcher = new IndexSearcher(reader);
Query query = KNNQueryFactory.create(
Expand Down Expand Up @@ -372,6 +373,7 @@ public void testKnnVectorIndex(
NativeMemoryLoadStrategy.IndexLoadStrategy.initialize(resourceWatcherService);

verify(perFieldKnnVectorsFormatSpy, atLeastOnce()).getKnnVectorsFormatForField(eq(FIELD_NAME_TWO));
verify(perFieldKnnVectorsFormatSpy, atLeastOnce()).getMaxDimensions(eq(FIELD_NAME_TWO));

IndexSearcher searcher1 = new IndexSearcher(reader1);
Query query1 = KNNQueryFactory.create(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -301,7 +301,7 @@ public void testTypeParser_parse_fromKnnMethodContext_invalidDimension() throws
XContentBuilder xContentBuilderOverMaxDimension = XContentFactory.jsonBuilder()
.startObject()
.field(TYPE_FIELD_NAME, KNN_VECTOR_TYPE)
.field(DIMENSION_FIELD_NAME, 2000)
.field(DIMENSION_FIELD_NAME, 20000)
.startObject(KNN_METHOD)
.field(NAME, METHOD_HNSW)
.field(METHOD_PARAMETER_SPACE_TYPE, SpaceType.L2)
Expand All @@ -321,7 +321,7 @@ public void testTypeParser_parse_fromKnnMethodContext_invalidDimension() throws
IllegalArgumentException.class,
() -> builderOverMaxDimension.build(new Mapper.BuilderContext(settings, new ContentPath()))
);
assertEquals("Dimension value cannot be greater than 1024 for vector: test-field-name", ex.getMessage());
assertEquals("Dimension value cannot be greater than 16000 for vector: test-field-name", ex.getMessage());

XContentBuilder xContentBuilderInvalidDimension = XContentFactory.jsonBuilder()
.startObject()
Expand Down

0 comments on commit 083ea2b

Please sign in to comment.