Skip to content

Commit

Permalink
Adding Field caps support for Semantic Text (elastic#111809)
Browse files Browse the repository at this point in the history
* Adding override function of fieldHasValue to exclude field when field is empty

* updaitng unit tests for Semantic Search Text mapper

* Adding yaml tests for validating field caps for Semantic Text field

* Update docs/changelog/111809.yaml

* Adding and updating yaml tests and changelog file

* Refactor yaml test
  • Loading branch information
Samiul-TheSoccerFan authored Aug 14, 2024
1 parent 3f280d0 commit fa2e281
Show file tree
Hide file tree
Showing 5 changed files with 189 additions and 2 deletions.
5 changes: 5 additions & 0 deletions docs/changelog/111809.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
pr: 111809
summary: Add Field caps support for Semantic Text
area: Mapping
type: enhancement
issues: []
2 changes: 1 addition & 1 deletion x-pack/plugin/inference/build.gradle
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ apply plugin: 'elasticsearch.internal-yaml-rest-test'

restResources {
restApi {
include '_common', 'bulk', 'indices', 'inference', 'index', 'get', 'update', 'reindex', 'search'
include '_common', 'bulk', 'indices', 'inference', 'index', 'get', 'update', 'reindex', 'search', 'field_caps'
}
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@

package org.elasticsearch.xpack.inference.mapper;

import org.apache.lucene.index.FieldInfos;
import org.apache.lucene.search.MatchNoDocsQuery;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.join.BitSetProducer;
Expand Down Expand Up @@ -320,7 +321,7 @@ public SemanticTextFieldType(
IndexVersion indexVersionCreated,
Map<String, String> meta
) {
super(name, false, false, false, TextSearchInfo.NONE, meta);
super(name, true, false, false, TextSearchInfo.NONE, meta);
this.inferenceId = inferenceId;
this.modelSettings = modelSettings;
this.inferenceField = inferenceField;
Expand Down Expand Up @@ -383,6 +384,11 @@ public IndexFieldData.Builder fielddataBuilder(FieldDataContext fieldDataContext
throw new IllegalArgumentException("[semantic_text] fields do not support sorting, scripting or aggregating");
}

@Override
public boolean fieldHasValue(FieldInfos fieldInfos) {
return fieldInfos.fieldInfo(getEmbeddingsFieldName(name())) != null;
}

public QueryBuilder semanticQuery(InferenceResults inferenceResults, float boost, String queryName) {
String nestedFieldPath = getChunksFieldName(name());
String inferenceResultsFieldName = getEmbeddingsFieldName(name());
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,8 @@
package org.elasticsearch.xpack.inference.mapper;

import org.apache.lucene.document.FeatureField;
import org.apache.lucene.index.FieldInfo;
import org.apache.lucene.index.FieldInfos;
import org.apache.lucene.index.IndexableField;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.BooleanClause;
Expand Down Expand Up @@ -63,6 +65,7 @@
import java.util.Collection;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.function.BiConsumer;

Expand Down Expand Up @@ -130,6 +133,25 @@ protected IngestScriptSupport ingestScriptSupport() {
throw new AssumptionViolatedException("not supported");
}

@Override
public MappedFieldType getMappedFieldType() {
return new SemanticTextFieldMapper.SemanticTextFieldType(
"field",
"fake-inference-id",
null,
null,
IndexVersion.current(),
Map.of()
);
}

@Override
protected void assertSearchable(MappedFieldType fieldType) {
assertThat(fieldType, instanceOf(SemanticTextFieldMapper.SemanticTextFieldType.class));
assertTrue(fieldType.isIndexed());
assertTrue(fieldType.isSearchable());
}

public void testDefaults() throws Exception {
DocumentMapper mapper = createDocumentMapper(fieldMapping(this::minimalMapping));
assertEquals(Strings.toString(fieldMapping(this::minimalMapping)), mapper.mappingSource().toString());
Expand All @@ -141,6 +163,13 @@ public void testDefaults() throws Exception {
assertTrue(fields.isEmpty());
}

@Override
public void testFieldHasValue() {
MappedFieldType fieldType = getMappedFieldType();
FieldInfos fieldInfos = new FieldInfos(new FieldInfo[] { getFieldInfoWithName(getEmbeddingsFieldName("field")) });
assertTrue(fieldType.fieldHasValue(fieldInfos));
}

public void testInferenceIdNotPresent() {
Exception e = expectThrows(
MapperParsingException.class,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,82 @@ setup:
- match: { "test-index.mappings.properties.sparse_field.model_settings.task_type": sparse_embedding }
- length: { "test-index.mappings.properties.sparse_field": 3 }

---
"Field caps with sparse embedding":

- requires:
cluster_features: "gte_v8.16.0"
reason: field_caps support for semantic_text added in 8.16.0

- do:
field_caps:
include_empty_fields: true
index: test-index
fields: "*"

- match: { indices: [ "test-index" ] }
- exists: fields.sparse_field
- exists: fields.dense_field

- do:
field_caps:
include_empty_fields: false
index: test-index
fields: "*"

- match: { indices: [ "test-index" ] }
- not_exists: fields.sparse_field
- not_exists: fields.dense_field

- do:
index:
index: test-index
id: doc_1
body:
sparse_field:
text: "these are not the droids you're looking for. He's free to go around"
inference:
inference_id: sparse-inference-id
model_settings:
task_type: sparse_embedding
chunks:
- text: "these are not the droids you're looking for"
embeddings:
feature_0: 1.0
feature_1: 2.0
feature_2: 3.0
feature_3: 4.0
- text: "He's free to go around"
embeddings:
feature_4: 0.1
feature_5: 0.2
feature_6: 0.3
feature_7: 0.4
refresh: true

- do:
field_caps:
include_empty_fields: true
index: test-index
fields: "*"

- match: { indices: [ "test-index" ] }
- exists: fields.sparse_field
- exists: fields.dense_field
- match: { fields.sparse_field.semantic_text.searchable: true }
- match: { fields.dense_field.semantic_text.searchable: true }

- do:
field_caps:
include_empty_fields: false
index: test-index
fields: "*"

- match: { indices: [ "test-index" ] }
- exists: fields.sparse_field
- not_exists: fields.dense_field
- match: { fields.sparse_field.semantic_text.searchable: true }

---
"Indexes dense vector document":

Expand Down Expand Up @@ -105,6 +181,77 @@ setup:
- match: { "test-index.mappings.properties.dense_field.model_settings.task_type": text_embedding }
- length: { "test-index.mappings.properties.dense_field": 3 }

---
"Field caps with text embedding":

- requires:
cluster_features: "gte_v8.16.0"
reason: field_caps support for semantic_text added in 8.16.0

- do:
field_caps:
include_empty_fields: true
index: test-index
fields: "*"

- match: { indices: [ "test-index" ] }
- exists: fields.sparse_field
- exists: fields.dense_field

- do:
field_caps:
include_empty_fields: false
index: test-index
fields: "*"

- match: { indices: [ "test-index" ] }
- not_exists: fields.sparse_field
- not_exists: fields.dense_field

- do:
index:
index: test-index
id: doc_2
body:
dense_field:
text: "these are not the droids you're looking for. He's free to go around"
inference:
inference_id: dense-inference-id
model_settings:
task_type: text_embedding
dimensions: 4
similarity: cosine
element_type: float
chunks:
- text: "these are not the droids you're looking for"
embeddings: [ 0.04673296958208084, -0.03237321600317955, -0.02543032355606556, 0.056035321205854416 ]
- text: "He's free to go around"
embeddings: [ 0.00641461368650198, -0.0016253676731139421, -0.05126338079571724, 0.053438711911439896 ]
refresh: true

- do:
field_caps:
include_empty_fields: true
index: test-index
fields: "*"

- match: { indices: [ "test-index" ] }
- exists: fields.sparse_field
- exists: fields.dense_field
- match: { fields.sparse_field.semantic_text.searchable: true }
- match: { fields.dense_field.semantic_text.searchable: true }

- do:
field_caps:
include_empty_fields: false
index: test-index
fields: "*"

- match: { indices: [ "test-index" ] }
- not_exists: fields.sparse_field
- exists: fields.dense_field
- match: { fields.dense_field.semantic_text.searchable: true }

---
"Can't be used as a multifield":

Expand Down

0 comments on commit fa2e281

Please sign in to comment.