Skip to content

Commit

Permalink
Fix match_phrase_prefix_query not working on text field with multiple…
Browse files Browse the repository at this point in the history
… values and index_prefixes (opensearch-project#10959)

* Fix match_phrase_prefix_query not working on text field with multiple values and index_prefixes
Signed-off-by: Gao Binlong <[email protected]>

* Add more test

Signed-off-by: Gao Binlong <[email protected]>

* modify change log

Signed-off-by: Gao Binlong <[email protected]>

* Fix test failure

Signed-off-by: Gao Binlong <[email protected]>

* Change the indexAnalyzer used by prefix field

Signed-off-by: Gao Binlong <[email protected]>

* Skip old version for yaml test

Signed-off-by: Gao Binlong <[email protected]>

* Optimize some code

Signed-off-by: Gao Binlong <[email protected]>

* Fix test failure

Signed-off-by: Gao Binlong <[email protected]>

* Modify yaml test description

Signed-off-by: Gao Binlong <[email protected]>

* Remove the name parameter for setAnalyzer()

Signed-off-by: Gao Binlong <[email protected]>

---------

Signed-off-by: Gao Binlong <[email protected]>
  • Loading branch information
gaobinlong authored and harshavamsi committed Jul 12, 2024
1 parent c93f3f3 commit e4c07c5
Show file tree
Hide file tree
Showing 5 changed files with 124 additions and 9 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
- Fix bug in SBP cancellation logic ([#13259](https://github.com/opensearch-project/OpenSearch/pull/13474))
- Fix handling of Short and Byte data types in ScriptProcessor ingest pipeline ([#14379](https://github.com/opensearch-project/OpenSearch/issues/14379))
- Switch to iterative version of WKT format parser ([#14086](https://github.com/opensearch-project/OpenSearch/pull/14086))
- Fix match_phrase_prefix_query not working on text field with multiple values and index_prefixes ([#10959](https://github.com/opensearch-project/OpenSearch/pull/10959))
- Fix the computed max shards of cluster to avoid int overflow ([#14155](https://github.com/opensearch-project/OpenSearch/pull/14155))
- Fixed rest-high-level client searchTemplate & mtermVectors endpoints to have a leading slash ([#14465](https://github.com/opensearch-project/OpenSearch/pull/14465))
- Write shard level metadata blob when snapshotting searchable snapshot indexes ([#13190](https://github.com/opensearch-project/OpenSearch/pull/13190))
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,12 @@ setup:
index_prefixes:
min_chars: 2
max_chars: 5

text_with_pos_inc_gap:
type: text
position_increment_gap: 201
index_prefixes:
min_chars: 2
max_chars: 5
- do:
index:
index: test
Expand All @@ -23,6 +28,18 @@ setup:
id: 2
body: { text: sentence with UPPERCASE WORDS }

- do:
index:
index: test
id: 3
body: { text: ["foo", "b-12"] }

- do:
index:
index: test
id: 4
body: { text_with_pos_inc_gap: ["foo", "b-12"] }

- do:
indices.refresh:
index: [test]
Expand Down Expand Up @@ -116,3 +133,36 @@ setup:
]

- match: {hits.total: 1}

# related issue: https://github.com/opensearch-project/OpenSearch/issues/9203
---
"search index prefixes with multiple values":
- skip:
version: " - 2.99.99"
reason: "the bug was fixed in 3.0.0"
- do:
search:
rest_total_hits_as_int: true
index: test
body:
query:
match_phrase_prefix:
text: "b-12"

- match: {hits.total: 1}

---
"search index prefixes with multiple values and custom position_increment_gap":
- skip:
version: " - 2.99.99"
reason: "the bug was fixed in 3.0.0"
- do:
search:
rest_total_hits_as_int: true
index: test
body:
query:
match_phrase_prefix:
text_with_pos_inc_gap: "b-12"

- match: {hits.total: 1}
Original file line number Diff line number Diff line change
Expand Up @@ -448,7 +448,6 @@ protected PrefixFieldMapper buildPrefixMapper(BuilderContext context, FieldType
pft.setStoreTermVectorOffsets(true);
}
PrefixFieldType prefixFieldType = new PrefixFieldType(tft, fullName + "._index_prefix", indexPrefixes.get());
prefixFieldType.setAnalyzer(analyzers.getIndexAnalyzer());
tft.setPrefixFieldType(prefixFieldType);
return new PrefixFieldMapper(pft, prefixFieldType);
}
Expand Down Expand Up @@ -522,19 +521,26 @@ private static class PrefixWrappedAnalyzer extends AnalyzerWrapper {
private final int minChars;
private final int maxChars;
private final Analyzer delegate;
private final int positionIncrementGap;

PrefixWrappedAnalyzer(Analyzer delegate, int minChars, int maxChars) {
PrefixWrappedAnalyzer(Analyzer delegate, int minChars, int maxChars, int positionIncrementGap) {
super(delegate.getReuseStrategy());
this.delegate = delegate;
this.minChars = minChars;
this.maxChars = maxChars;
this.positionIncrementGap = positionIncrementGap;
}

@Override
protected Analyzer getWrappedAnalyzer(String fieldName) {
return delegate;
}

@Override
public int getPositionIncrementGap(String fieldName) {
return positionIncrementGap;
}

@Override
protected TokenStreamComponents wrapComponents(String fieldName, TokenStreamComponents components) {
TokenFilter filter = new EdgeNGramTokenFilter(components.getTokenStream(), minChars, maxChars, false);
Expand Down Expand Up @@ -588,17 +594,18 @@ static final class PrefixFieldType extends StringFieldType {

final int minChars;
final int maxChars;
final TextFieldType parentField;
final TextFieldType parent;

PrefixFieldType(TextFieldType parentField, String name, PrefixConfig config) {
this(parentField, name, config.minChars, config.maxChars);
}

PrefixFieldType(TextFieldType parentField, String name, int minChars, int maxChars) {
super(name, true, false, false, parentField.getTextSearchInfo(), Collections.emptyMap());
PrefixFieldType(TextFieldType parent, String name, int minChars, int maxChars) {
super(name, true, false, false, parent.getTextSearchInfo(), Collections.emptyMap());
this.minChars = minChars;
this.maxChars = maxChars;
this.parentField = parentField;
this.parent = parent;
setAnalyzer(parent.indexAnalyzer());
}

@Override
Expand All @@ -609,8 +616,13 @@ public ValueFetcher valueFetcher(QueryShardContext context, SearchLookup searchL
}

void setAnalyzer(NamedAnalyzer delegate) {
String analyzerName = delegate.name();
setIndexAnalyzer(
new NamedAnalyzer(delegate.name(), AnalyzerScope.INDEX, new PrefixWrappedAnalyzer(delegate.analyzer(), minChars, maxChars))
new NamedAnalyzer(
analyzerName,
AnalyzerScope.INDEX,
new PrefixWrappedAnalyzer(delegate.analyzer(), minChars, maxChars, delegate.getPositionIncrementGap(analyzerName))
)
);
}

Expand Down Expand Up @@ -639,7 +651,7 @@ public Query prefixQuery(String value, MultiTermQuery.RewriteMethod method, bool
Automaton automaton = Operations.concatenate(automata);
AutomatonQuery query = AutomatonQueries.createAutomatonQuery(new Term(name(), value + "*"), automaton, method);
return new BooleanQuery.Builder().add(query, BooleanClause.Occur.SHOULD)
.add(new TermQuery(new Term(parentField.name(), value)), BooleanClause.Occur.SHOULD)
.add(new TermQuery(new Term(parent.name(), value)), BooleanClause.Occur.SHOULD)
.build();
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -380,6 +380,57 @@ public void testIndexOptions() throws IOException {
}
}

public void testPositionIncrementGapOnIndexPrefixField() throws IOException {
// test default position_increment_gap
MapperService mapperService = createMapperService(
fieldMapping(b -> b.field("type", "text").field("analyzer", "default").startObject("index_prefixes").endObject())
);
ParsedDocument doc = mapperService.documentMapper().parse(source(b -> b.array("field", new String[] { "a", "b 12" })));

withLuceneIndex(mapperService, iw -> iw.addDocument(doc.rootDoc()), reader -> {
TermsEnum terms = getOnlyLeafReader(reader).terms("field").iterator();
assertTrue(terms.seekExact(new BytesRef("12")));
PostingsEnum postings = terms.postings(null, PostingsEnum.POSITIONS);
assertEquals(0, postings.nextDoc());
assertEquals(TextFieldMapper.Defaults.POSITION_INCREMENT_GAP + 2, postings.nextPosition());
});

withLuceneIndex(mapperService, iw -> iw.addDocument(doc.rootDoc()), reader -> {
TermsEnum terms = getOnlyLeafReader(reader).terms("field._index_prefix").iterator();
assertTrue(terms.seekExact(new BytesRef("12")));
PostingsEnum postings = terms.postings(null, PostingsEnum.POSITIONS);
assertEquals(0, postings.nextDoc());
assertEquals(TextFieldMapper.Defaults.POSITION_INCREMENT_GAP + 2, postings.nextPosition());
});

// test custom position_increment_gap
final int positionIncrementGap = randomIntBetween(1, 1000);
MapperService mapperService2 = createMapperService(
fieldMapping(
b -> b.field("type", "text")
.field("position_increment_gap", positionIncrementGap)
.field("analyzer", "default")
.startObject("index_prefixes")
.endObject()
)
);
ParsedDocument doc2 = mapperService2.documentMapper().parse(source(b -> b.array("field", new String[] { "a", "b 12" })));
withLuceneIndex(mapperService2, iw -> iw.addDocument(doc2.rootDoc()), reader -> {
TermsEnum terms = getOnlyLeafReader(reader).terms("field").iterator();
assertTrue(terms.seekExact(new BytesRef("12")));
PostingsEnum postings = terms.postings(null, PostingsEnum.POSITIONS);
assertEquals(0, postings.nextDoc());
assertEquals(positionIncrementGap + 2, postings.nextPosition());
});
withLuceneIndex(mapperService2, iw -> iw.addDocument(doc2.rootDoc()), reader -> {
TermsEnum terms = getOnlyLeafReader(reader).terms("field._index_prefix").iterator();
assertTrue(terms.seekExact(new BytesRef("12")));
PostingsEnum postings = terms.postings(null, PostingsEnum.POSITIONS);
assertEquals(0, postings.nextDoc());
assertEquals(positionIncrementGap + 2, postings.nextPosition());
});
}

public void testDefaultPositionIncrementGap() throws IOException {
MapperService mapperService = createMapperService(fieldMapping(this::minimalMapping));
ParsedDocument doc = mapperService.documentMapper().parse(source(b -> b.array("field", new String[] { "a", "b" })));
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -167,6 +167,7 @@ public void testFuzzyQuery() {

public void testIndexPrefixes() {
TextFieldType ft = createFieldType(true);
ft.setIndexAnalyzer(Lucene.STANDARD_ANALYZER);
ft.setPrefixFieldType(new TextFieldMapper.PrefixFieldType(ft, "field._index_prefix", 2, 10));

Query q = ft.prefixQuery("goin", CONSTANT_SCORE_REWRITE, false, randomMockShardContext());
Expand Down

0 comments on commit e4c07c5

Please sign in to comment.