diff --git a/docs/changelog/111809.yaml b/docs/changelog/111809.yaml new file mode 100644 index 0000000000000..5a2f220e3a697 --- /dev/null +++ b/docs/changelog/111809.yaml @@ -0,0 +1,5 @@ +pr: 111809 +summary: Add Field caps support for Semantic Text +area: Mapping +type: enhancement +issues: [] diff --git a/x-pack/plugin/inference/build.gradle b/x-pack/plugin/inference/build.gradle index beeec94f21ebf..211b99343340d 100644 --- a/x-pack/plugin/inference/build.gradle +++ b/x-pack/plugin/inference/build.gradle @@ -12,7 +12,7 @@ apply plugin: 'elasticsearch.internal-yaml-rest-test' restResources { restApi { - include '_common', 'bulk', 'indices', 'inference', 'index', 'get', 'update', 'reindex', 'search' + include '_common', 'bulk', 'indices', 'inference', 'index', 'get', 'update', 'reindex', 'search', 'field_caps' } } diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/mapper/SemanticTextFieldMapper.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/mapper/SemanticTextFieldMapper.java index b9b95afbf6dc6..a8c3de84572a7 100644 --- a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/mapper/SemanticTextFieldMapper.java +++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/mapper/SemanticTextFieldMapper.java @@ -7,6 +7,7 @@ package org.elasticsearch.xpack.inference.mapper; +import org.apache.lucene.index.FieldInfos; import org.apache.lucene.search.MatchNoDocsQuery; import org.apache.lucene.search.Query; import org.apache.lucene.search.join.BitSetProducer; @@ -320,7 +321,7 @@ public SemanticTextFieldType( IndexVersion indexVersionCreated, Map meta ) { - super(name, false, false, false, TextSearchInfo.NONE, meta); + super(name, true, false, false, TextSearchInfo.NONE, meta); this.inferenceId = inferenceId; this.modelSettings = modelSettings; this.inferenceField = inferenceField; @@ -383,6 +384,11 @@ public IndexFieldData.Builder fielddataBuilder(FieldDataContext fieldDataContext throw new IllegalArgumentException("[semantic_text] fields do not support sorting, scripting or aggregating"); } + @Override + public boolean fieldHasValue(FieldInfos fieldInfos) { + return fieldInfos.fieldInfo(getEmbeddingsFieldName(name())) != null; + } + public QueryBuilder semanticQuery(InferenceResults inferenceResults, float boost, String queryName) { String nestedFieldPath = getChunksFieldName(name()); String inferenceResultsFieldName = getEmbeddingsFieldName(name()); diff --git a/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/mapper/SemanticTextFieldMapperTests.java b/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/mapper/SemanticTextFieldMapperTests.java index 1cae8d981313f..bb0691c691176 100644 --- a/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/mapper/SemanticTextFieldMapperTests.java +++ b/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/mapper/SemanticTextFieldMapperTests.java @@ -8,6 +8,8 @@ package org.elasticsearch.xpack.inference.mapper; import org.apache.lucene.document.FeatureField; +import org.apache.lucene.index.FieldInfo; +import org.apache.lucene.index.FieldInfos; import org.apache.lucene.index.IndexableField; import org.apache.lucene.index.Term; import org.apache.lucene.search.BooleanClause; @@ -63,6 +65,7 @@ import java.util.Collection; import java.util.HashSet; import java.util.List; +import java.util.Map; import java.util.Set; import java.util.function.BiConsumer; @@ -130,6 +133,25 @@ protected IngestScriptSupport ingestScriptSupport() { throw new AssumptionViolatedException("not supported"); } + @Override + public MappedFieldType getMappedFieldType() { + return new SemanticTextFieldMapper.SemanticTextFieldType( + "field", + "fake-inference-id", + null, + null, + IndexVersion.current(), + Map.of() + ); + } + + @Override + protected void assertSearchable(MappedFieldType fieldType) { + assertThat(fieldType, instanceOf(SemanticTextFieldMapper.SemanticTextFieldType.class)); + assertTrue(fieldType.isIndexed()); + assertTrue(fieldType.isSearchable()); + } + public void testDefaults() throws Exception { DocumentMapper mapper = createDocumentMapper(fieldMapping(this::minimalMapping)); assertEquals(Strings.toString(fieldMapping(this::minimalMapping)), mapper.mappingSource().toString()); @@ -141,6 +163,13 @@ public void testDefaults() throws Exception { assertTrue(fields.isEmpty()); } + @Override + public void testFieldHasValue() { + MappedFieldType fieldType = getMappedFieldType(); + FieldInfos fieldInfos = new FieldInfos(new FieldInfo[] { getFieldInfoWithName(getEmbeddingsFieldName("field")) }); + assertTrue(fieldType.fieldHasValue(fieldInfos)); + } + public void testInferenceIdNotPresent() { Exception e = expectThrows( MapperParsingException.class, diff --git a/x-pack/plugin/inference/src/yamlRestTest/resources/rest-api-spec/test/inference/10_semantic_text_field_mapping.yml b/x-pack/plugin/inference/src/yamlRestTest/resources/rest-api-spec/test/inference/10_semantic_text_field_mapping.yml index d7f7e21e6f428..3f907ae1de6cd 100644 --- a/x-pack/plugin/inference/src/yamlRestTest/resources/rest-api-spec/test/inference/10_semantic_text_field_mapping.yml +++ b/x-pack/plugin/inference/src/yamlRestTest/resources/rest-api-spec/test/inference/10_semantic_text_field_mapping.yml @@ -63,6 +63,82 @@ setup: - match: { "test-index.mappings.properties.sparse_field.model_settings.task_type": sparse_embedding } - length: { "test-index.mappings.properties.sparse_field": 3 } +--- +"Field caps with sparse embedding": + + - requires: + cluster_features: "gte_v8.16.0" + reason: field_caps support for semantic_text added in 8.16.0 + + - do: + field_caps: + include_empty_fields: true + index: test-index + fields: "*" + + - match: { indices: [ "test-index" ] } + - exists: fields.sparse_field + - exists: fields.dense_field + + - do: + field_caps: + include_empty_fields: false + index: test-index + fields: "*" + + - match: { indices: [ "test-index" ] } + - not_exists: fields.sparse_field + - not_exists: fields.dense_field + + - do: + index: + index: test-index + id: doc_1 + body: + sparse_field: + text: "these are not the droids you're looking for. He's free to go around" + inference: + inference_id: sparse-inference-id + model_settings: + task_type: sparse_embedding + chunks: + - text: "these are not the droids you're looking for" + embeddings: + feature_0: 1.0 + feature_1: 2.0 + feature_2: 3.0 + feature_3: 4.0 + - text: "He's free to go around" + embeddings: + feature_4: 0.1 + feature_5: 0.2 + feature_6: 0.3 + feature_7: 0.4 + refresh: true + + - do: + field_caps: + include_empty_fields: true + index: test-index + fields: "*" + + - match: { indices: [ "test-index" ] } + - exists: fields.sparse_field + - exists: fields.dense_field + - match: { fields.sparse_field.semantic_text.searchable: true } + - match: { fields.dense_field.semantic_text.searchable: true } + + - do: + field_caps: + include_empty_fields: false + index: test-index + fields: "*" + + - match: { indices: [ "test-index" ] } + - exists: fields.sparse_field + - not_exists: fields.dense_field + - match: { fields.sparse_field.semantic_text.searchable: true } + --- "Indexes dense vector document": @@ -105,6 +181,77 @@ setup: - match: { "test-index.mappings.properties.dense_field.model_settings.task_type": text_embedding } - length: { "test-index.mappings.properties.dense_field": 3 } +--- +"Field caps with text embedding": + + - requires: + cluster_features: "gte_v8.16.0" + reason: field_caps support for semantic_text added in 8.16.0 + + - do: + field_caps: + include_empty_fields: true + index: test-index + fields: "*" + + - match: { indices: [ "test-index" ] } + - exists: fields.sparse_field + - exists: fields.dense_field + + - do: + field_caps: + include_empty_fields: false + index: test-index + fields: "*" + + - match: { indices: [ "test-index" ] } + - not_exists: fields.sparse_field + - not_exists: fields.dense_field + + - do: + index: + index: test-index + id: doc_2 + body: + dense_field: + text: "these are not the droids you're looking for. He's free to go around" + inference: + inference_id: dense-inference-id + model_settings: + task_type: text_embedding + dimensions: 4 + similarity: cosine + element_type: float + chunks: + - text: "these are not the droids you're looking for" + embeddings: [ 0.04673296958208084, -0.03237321600317955, -0.02543032355606556, 0.056035321205854416 ] + - text: "He's free to go around" + embeddings: [ 0.00641461368650198, -0.0016253676731139421, -0.05126338079571724, 0.053438711911439896 ] + refresh: true + + - do: + field_caps: + include_empty_fields: true + index: test-index + fields: "*" + + - match: { indices: [ "test-index" ] } + - exists: fields.sparse_field + - exists: fields.dense_field + - match: { fields.sparse_field.semantic_text.searchable: true } + - match: { fields.dense_field.semantic_text.searchable: true } + + - do: + field_caps: + include_empty_fields: false + index: test-index + fields: "*" + + - match: { indices: [ "test-index" ] } + - not_exists: fields.sparse_field + - exists: fields.dense_field + - match: { fields.dense_field.semantic_text.searchable: true } + --- "Can't be used as a multifield":