From 714aaa40eadf2c392b53efd11fc629adb5cf9014 Mon Sep 17 00:00:00 2001 From: Igor Motov Date: Wed, 11 Sep 2013 14:40:07 -0400 Subject: [PATCH] CompletionFieldMapper should use index name instead of full name Fixes #3669 --- .../mapper/core/CompletionFieldMapper.java | 30 ++++++------- .../AnalyzingCompletionLookupProvider.java | 26 ++++++----- .../suggest/CompletionSuggestSearchTests.java | 43 ++++++++++--------- 3 files changed, 53 insertions(+), 46 deletions(-) diff --git a/src/main/java/org/elasticsearch/index/mapper/core/CompletionFieldMapper.java b/src/main/java/org/elasticsearch/index/mapper/core/CompletionFieldMapper.java index ce7e4bf40a7b0..7d26e2d934c2e 100644 --- a/src/main/java/org/elasticsearch/index/mapper/core/CompletionFieldMapper.java +++ b/src/main/java/org/elasticsearch/index/mapper/core/CompletionFieldMapper.java @@ -112,7 +112,7 @@ public Builder preservePositionIncrements(boolean preservePositionIncrements) { this.preservePositionIncrements = preservePositionIncrements; return this; } - + public Builder maxInputLength(int maxInputLength) { if (maxInputLength <= 0) { throw new ElasticSearchIllegalArgumentException(Fields.MAX_INPUT_LENGTH + " must be > 0 but was [" + maxInputLength + "]"); @@ -175,7 +175,7 @@ public static class TypeParser implements Mapper.TypeParser { private NamedAnalyzer getNamedAnalyzer(ParserContext parserContext, String name) { NamedAnalyzer analyzer = parserContext.analysisService().analyzer(name); if (analyzer == null) { - throw new ElasticSearchIllegalArgumentException("Can't find default or mapped analyzer with name [" + name +"]"); + throw new ElasticSearchIllegalArgumentException("Can't find default or mapped analyzer with name [" + name + "]"); } return analyzer; } @@ -280,21 +280,21 @@ public Field getCompletionField(String input, BytesRef payload) { final String originalInput = input; if (input.length() > maxInputLength) { final int len = correctSubStringLen(input, Math.min(maxInputLength, input.length())); - input = input.substring(0, len); + input = input.substring(0, len); } for (int i = 0; i < input.length(); i++) { if (isReservedChar(input.charAt(i))) { throw new ElasticSearchIllegalArgumentException("Illegal input [" + originalInput + "] UTF-16 codepoint [0x" - + Integer.toHexString((int) input.charAt(i)).toUpperCase(Locale.ROOT) + + Integer.toHexString((int) input.charAt(i)).toUpperCase(Locale.ROOT) + "] at position " + i + " is a reserved character"); } } - return new SuggestField(names().fullName(), input, this.fieldType, payload, analyzingSuggestLookupProvider); + return new SuggestField(names.indexName(), input, this.fieldType, payload, analyzingSuggestLookupProvider); } - + public static int correctSubStringLen(String input, int len) { - if (Character.isHighSurrogate(input.charAt(len-1))) { - assert input.length() >= len+1 && Character.isLowSurrogate(input.charAt(len)); + if (Character.isHighSurrogate(input.charAt(len - 1))) { + assert input.length() >= len + 1 && Character.isLowSurrogate(input.charAt(len)); return len + 1; } return len; @@ -308,7 +308,7 @@ public BytesRef buildPayload(BytesRef surfaceForm, long weight, BytesRef payload private static final class SuggestField extends Field { private final BytesRef payload; private final CompletionTokenStream.ToFiniteStrings toFiniteStrings; - + public SuggestField(String name, Reader value, FieldType type, BytesRef payload, CompletionTokenStream.ToFiniteStrings toFiniteStrings) { super(name, value, type); this.payload = payload; @@ -342,7 +342,7 @@ public XContentBuilder toXContent(XContentBuilder builder, Params params) throws builder.field(Fields.PAYLOADS, this.payloads); builder.field(Fields.PRESERVE_SEPARATORS, this.preserveSeparators); builder.field(Fields.PRESERVE_POSITION_INCREMENTS, this.preservePositionIncrements); - builder.field(Fields.MAX_INPUT_LENGTH, this.maxInputLength); + builder.field(Fields.MAX_INPUT_LENGTH, this.maxInputLength); return builder.endObject(); } @@ -379,7 +379,7 @@ public String value(Object value) { public boolean isStoringPayloads() { return payloads; } - + @Override public void merge(Mapper mergeWith, MergeContext mergeContext) throws MergeMappingException { super.merge(mergeWith, mergeContext); @@ -387,19 +387,19 @@ public void merge(Mapper mergeWith, MergeContext mergeContext) throws MergeMappi if (payloads != fieldMergeWith.payloads) { mergeContext.addConflict("mapper [" + names.fullName() + "] has different payload values"); } - if (preservePositionIncrements != fieldMergeWith.preservePositionIncrements) { + if (preservePositionIncrements != fieldMergeWith.preservePositionIncrements) { mergeContext.addConflict("mapper [" + names.fullName() + "] has different 'preserve_position_increments' values"); } - if (preserveSeparators != fieldMergeWith.preserveSeparators) { + if (preserveSeparators != fieldMergeWith.preserveSeparators) { mergeContext.addConflict("mapper [" + names.fullName() + "] has different 'preserve_separators' values"); } if (!mergeContext.mergeFlags().simulate()) { this.maxInputLength = fieldMergeWith.maxInputLength; } } - + private static final char END_LABEL = 0x00; - + // this should be package private but our tests don't allow it. public static boolean isReservedChar(char character) { /* we also use 0xFF as a SEP_LABEL in the suggester but it's not valid UTF-8 so no need to check. diff --git a/src/main/java/org/elasticsearch/search/suggest/completion/AnalyzingCompletionLookupProvider.java b/src/main/java/org/elasticsearch/search/suggest/completion/AnalyzingCompletionLookupProvider.java index 8d966e55c0b3b..57c7d30ada146 100644 --- a/src/main/java/org/elasticsearch/search/suggest/completion/AnalyzingCompletionLookupProvider.java +++ b/src/main/java/org/elasticsearch/search/suggest/completion/AnalyzingCompletionLookupProvider.java @@ -31,8 +31,11 @@ import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.IOUtils; import org.apache.lucene.util.IntsRef; -import org.apache.lucene.util.fst.*; +import org.apache.lucene.util.fst.ByteSequenceOutputs; +import org.apache.lucene.util.fst.FST; +import org.apache.lucene.util.fst.PairOutputs; import org.apache.lucene.util.fst.PairOutputs.Pair; +import org.apache.lucene.util.fst.PositiveIntOutputs; import org.elasticsearch.common.regex.Regex; import org.elasticsearch.index.mapper.FieldMapper; import org.elasticsearch.search.suggest.completion.Completion090PostingsFormat.CompletionLookupProvider; @@ -50,10 +53,10 @@ public class AnalyzingCompletionLookupProvider extends CompletionLookupProvider private static final int MAX_SURFACE_FORMS_PER_ANALYZED_FORM = 256; private static final int MAX_GRAPH_EXPANSIONS = -1; - + public static final String CODEC_NAME = "analyzing"; public static final int CODEC_VERSION = 1; - + private boolean preserveSep; private boolean preservePositionIncrements; private int maxSurfaceFormsPerAnalyzedForm; @@ -84,6 +87,7 @@ public FieldsConsumer consumer(final IndexOutput output) throws IOException { CodecUtil.writeHeader(output, CODEC_NAME, CODEC_VERSION); return new FieldsConsumer() { private Map fieldOffsets = new HashMap(); + @Override public void close() throws IOException { try { /* @@ -179,7 +183,7 @@ public void addPosition(int position, BytesRef payload, int startOffset, int end analyzingSuggestLookupProvider.parsePayload(payload, spare); builder.addSurface(spare.surfaceForm, spare.payload, spare.weight); // multi fields have the same surface form so we sum up here - maxAnalyzedPathsForOneInput = Math.max(maxAnalyzedPathsForOneInput, position+1); + maxAnalyzedPathsForOneInput = Math.max(maxAnalyzedPathsForOneInput, position + 1); } @Override @@ -189,7 +193,9 @@ public void finishDoc() throws IOException { public int getMaxAnalyzedPathsForOneInput() { return maxAnalyzedPathsForOneInput; } - }; + } + + ; @Override @@ -225,11 +231,11 @@ public LookupFactory load(IndexInput input) throws IOException { return new LookupFactory() { @Override public Lookup getLookup(FieldMapper mapper, CompletionSuggestionContext suggestionContext) { - AnalyzingSuggestHolder analyzingSuggestHolder = lookupMap.get(mapper.names().fullName()); + AnalyzingSuggestHolder analyzingSuggestHolder = lookupMap.get(mapper.names().indexName()); if (analyzingSuggestHolder == null) { return null; } - int flags = analyzingSuggestHolder.preserveSep? XAnalyzingSuggester.PRESERVE_SEP : 0; + int flags = analyzingSuggestHolder.preserveSep ? XAnalyzingSuggester.PRESERVE_SEP : 0; XAnalyzingSuggester suggester; if (suggestionContext.isFuzzy()) { @@ -251,14 +257,14 @@ public Lookup getLookup(FieldMapper mapper, CompletionSuggestionContext sugge } @Override - public CompletionStats stats(String ... fields) { + public CompletionStats stats(String... fields) { long sizeInBytes = 0; TObjectLongHashMap completionFields = null; - if (fields != null && fields.length > 0) { + if (fields != null && fields.length > 0) { completionFields = new TObjectLongHashMap(fields.length); } - for (Map.Entry entry: lookupMap.entrySet()) { + for (Map.Entry entry : lookupMap.entrySet()) { sizeInBytes += entry.getValue().fst.sizeInBytes(); if (fields == null || fields.length == 0) { continue; diff --git a/src/test/java/org/elasticsearch/search/suggest/CompletionSuggestSearchTests.java b/src/test/java/org/elasticsearch/search/suggest/CompletionSuggestSearchTests.java index b2711c7cc67ab..c94eb331c0fa5 100644 --- a/src/test/java/org/elasticsearch/search/suggest/CompletionSuggestSearchTests.java +++ b/src/test/java/org/elasticsearch/search/suggest/CompletionSuggestSearchTests.java @@ -20,6 +20,7 @@ import com.carrotsearch.randomizedtesting.generators.RandomStrings; import com.google.common.collect.Lists; +import org.elasticsearch.AbstractSharedClusterTest; import org.elasticsearch.action.admin.cluster.health.ClusterHealthStatus; import org.elasticsearch.action.admin.indices.mapping.put.PutMappingResponse; import org.elasticsearch.action.admin.indices.optimize.OptimizeResponse; @@ -39,7 +40,6 @@ import org.elasticsearch.search.suggest.completion.CompletionSuggestionBuilder; import org.elasticsearch.search.suggest.completion.CompletionSuggestionFuzzyBuilder; import org.elasticsearch.test.hamcrest.ElasticsearchAssertions; -import org.elasticsearch.AbstractSharedClusterTest; import org.junit.Test; import java.io.IOException; @@ -370,6 +370,7 @@ public void testThatUpgradeToMultiFieldWorks() throws Exception { .startObject(TYPE).startObject("properties") .startObject(FIELD) .field("type", "multi_field") + .field("path", "just_name") .startObject("fields") .startObject(FIELD).field("type", "string").endObject() .startObject("suggest").field("type", "completion").field("index_analyzer", "simple").field("search_analyzer", "simple").endObject() @@ -381,7 +382,7 @@ public void testThatUpgradeToMultiFieldWorks() throws Exception { assertThat(putMappingResponse.isAcknowledged(), is(true)); SuggestResponse suggestResponse = client().prepareSuggest(INDEX).addSuggestion( - new CompletionSuggestionBuilder("suggs").field(FIELD + ".suggest").text("f").size(10) + new CompletionSuggestionBuilder("suggs").field("suggest").text("f").size(10) ).execute().actionGet(); assertSuggestions(suggestResponse, "suggs"); @@ -389,7 +390,7 @@ public void testThatUpgradeToMultiFieldWorks() throws Exception { waitForRelocation(ClusterHealthStatus.GREEN); SuggestResponse afterReindexingResponse = client().prepareSuggest(INDEX).addSuggestion( - new CompletionSuggestionBuilder("suggs").field(FIELD + ".suggest").text("f").size(10) + new CompletionSuggestionBuilder("suggs").field("suggest").text("f").size(10) ).execute().actionGet(); assertSuggestions(afterReindexingResponse, "suggs", "Foo Fighters"); } @@ -684,7 +685,7 @@ private void createData(boolean optimize) throws IOException, InterruptedExcepti client().admin().indices().prepareOptimize(INDEX).execute().actionGet(); } } - + @Test // see #3555 public void testPrunedSegments() throws IOException { createIndexAndMappingAndSettings(settingsBuilder().put(SETTING_NUMBER_OF_SHARDS, 1).put(SETTING_NUMBER_OF_REPLICAS, 0), "standard", "standard", false, false, false); @@ -696,7 +697,7 @@ public void testPrunedSegments() throws IOException { ).get(); client().prepareIndex(INDEX, TYPE, "2").setSource(jsonBuilder() .startObject() - .field("somefield", "somevalue") + .field("somefield", "somevalue") .endObject() ).get(); // we have 2 docs in a segment... OptimizeResponse actionGet = client().admin().indices().prepareOptimize().setFlush(true).setMaxNumSegments(1).setRefresh(true).execute().actionGet(); @@ -704,7 +705,7 @@ public void testPrunedSegments() throws IOException { // update the first one and then merge.. the target segment will have no value in FIELD client().prepareIndex(INDEX, TYPE, "1").setSource(jsonBuilder() .startObject() - .field("somefield", "somevalue") + .field("somefield", "somevalue") .endObject() ).get(); actionGet = client().admin().indices().prepareOptimize().setFlush(true).setMaxNumSegments(1).setRefresh(true).execute().actionGet(); @@ -712,27 +713,27 @@ public void testPrunedSegments() throws IOException { assertSuggestions("b"); assertThat(2l, equalTo(client().prepareCount(INDEX).get().getCount())); - for(IndexShardSegments seg : client().admin().indices().prepareSegments().get().getIndices().get(INDEX)) { + for (IndexShardSegments seg : client().admin().indices().prepareSegments().get().getIndices().get(INDEX)) { ShardSegments[] shards = seg.getShards(); for (ShardSegments shardSegments : shards) { assertThat(1, equalTo(shardSegments.getSegments().size())); } } } - + @Test public void testMaxFieldLength() throws IOException { client().admin().indices().prepareCreate(INDEX).get(); int iters = atLeast(10); for (int i = 0; i < iters; i++) { int len = between(3, 50); - String str = replaceReservedChars(randomRealisticUnicodeOfCodepointLengthBetween(len+1, atLeast(len + 2)), (char)0x01); + String str = replaceReservedChars(randomRealisticUnicodeOfCodepointLengthBetween(len + 1, atLeast(len + 2)), (char) 0x01); ElasticsearchAssertions.assertAcked(client().admin().indices().preparePutMapping(INDEX).setType(TYPE).setSource(jsonBuilder().startObject() .startObject(TYPE).startObject("properties") .startObject(FIELD) .field("type", "completion") .field("max_input_len", len) - // upgrade mapping each time + // upgrade mapping each time .field("analyzer", "keyword") .endObject() .endObject().endObject() @@ -748,14 +749,14 @@ public void testMaxFieldLength() throws IOException { assertSuggestions(str.substring(0, prefixLen), "foobar"); if (len + 1 < str.length()) { assertSuggestions(str.substring(0, CompletionFieldMapper.correctSubStringLen(str, - len + (Character.isHighSurrogate(str.charAt(len-1)) ? 2 : 1)))); + len + (Character.isHighSurrogate(str.charAt(len - 1)) ? 2 : 1)))); } - } + } } - + @Test // see #3596 - public void testVeryLongInput() throws IOException { + public void testVeryLongInput() throws IOException { client().admin().indices().prepareCreate(INDEX).get(); ElasticsearchAssertions.assertAcked(client().admin().indices().preparePutMapping(INDEX).setType(TYPE).setSource(jsonBuilder().startObject() .startObject(TYPE).startObject("properties") @@ -766,19 +767,19 @@ public void testVeryLongInput() throws IOException { .endObject())); ensureYellow(); // can cause stack overflow without the default max_input_len - String longString = replaceReservedChars(randomRealisticUnicodeOfLength(atLeast(5000)), (char)0x01); + String longString = replaceReservedChars(randomRealisticUnicodeOfLength(atLeast(5000)), (char) 0x01); client().prepareIndex(INDEX, TYPE, "1").setSource(jsonBuilder() .startObject().startObject(FIELD) .startArray("input").value(longString).endArray() .field("output", "foobar") .endObject().endObject() ).setRefresh(true).get(); - + } - + // see #3648 @Test(expected = MapperParsingException.class) - public void testReservedChars() throws IOException { + public void testReservedChars() throws IOException { client().admin().indices().prepareCreate(INDEX).get(); ElasticsearchAssertions.assertAcked(client().admin().indices().preparePutMapping(INDEX).setType(TYPE).setSource(jsonBuilder().startObject() .startObject(TYPE).startObject("properties") @@ -789,16 +790,16 @@ public void testReservedChars() throws IOException { .endObject())); ensureYellow(); // can cause stack overflow without the default max_input_len - String string = "foo" + (char)0x00 + "bar"; + String string = "foo" + (char) 0x00 + "bar"; client().prepareIndex(INDEX, TYPE, "1").setSource(jsonBuilder() .startObject().startObject(FIELD) .startArray("input").value(string).endArray() .field("output", "foobar") .endObject().endObject() ).setRefresh(true).get(); - + } - + private static String replaceReservedChars(String input, char replacement) { char[] charArray = input.toCharArray(); for (int i = 0; i < charArray.length; i++) {