Skip to content

Commit

Permalink
CompletionFieldMapper should use index name instead of full name
Browse files Browse the repository at this point in the history
  • Loading branch information
imotov committed Sep 12, 2013
1 parent 507b6a6 commit 714aaa4
Show file tree
Hide file tree
Showing 3 changed files with 53 additions and 46 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -112,7 +112,7 @@ public Builder preservePositionIncrements(boolean preservePositionIncrements) {
this.preservePositionIncrements = preservePositionIncrements;
return this;
}

public Builder maxInputLength(int maxInputLength) {
if (maxInputLength <= 0) {
throw new ElasticSearchIllegalArgumentException(Fields.MAX_INPUT_LENGTH + " must be > 0 but was [" + maxInputLength + "]");
Expand Down Expand Up @@ -175,7 +175,7 @@ public static class TypeParser implements Mapper.TypeParser {
private NamedAnalyzer getNamedAnalyzer(ParserContext parserContext, String name) {
NamedAnalyzer analyzer = parserContext.analysisService().analyzer(name);
if (analyzer == null) {
throw new ElasticSearchIllegalArgumentException("Can't find default or mapped analyzer with name [" + name +"]");
throw new ElasticSearchIllegalArgumentException("Can't find default or mapped analyzer with name [" + name + "]");
}
return analyzer;
}
Expand Down Expand Up @@ -280,21 +280,21 @@ public Field getCompletionField(String input, BytesRef payload) {
final String originalInput = input;
if (input.length() > maxInputLength) {
final int len = correctSubStringLen(input, Math.min(maxInputLength, input.length()));
input = input.substring(0, len);
input = input.substring(0, len);
}
for (int i = 0; i < input.length(); i++) {
if (isReservedChar(input.charAt(i))) {
throw new ElasticSearchIllegalArgumentException("Illegal input [" + originalInput + "] UTF-16 codepoint [0x"
+ Integer.toHexString((int) input.charAt(i)).toUpperCase(Locale.ROOT)
+ Integer.toHexString((int) input.charAt(i)).toUpperCase(Locale.ROOT)
+ "] at position " + i + " is a reserved character");
}
}
return new SuggestField(names().fullName(), input, this.fieldType, payload, analyzingSuggestLookupProvider);
return new SuggestField(names.indexName(), input, this.fieldType, payload, analyzingSuggestLookupProvider);
}

public static int correctSubStringLen(String input, int len) {
if (Character.isHighSurrogate(input.charAt(len-1))) {
assert input.length() >= len+1 && Character.isLowSurrogate(input.charAt(len));
if (Character.isHighSurrogate(input.charAt(len - 1))) {
assert input.length() >= len + 1 && Character.isLowSurrogate(input.charAt(len));
return len + 1;
}
return len;
Expand All @@ -308,7 +308,7 @@ public BytesRef buildPayload(BytesRef surfaceForm, long weight, BytesRef payload
private static final class SuggestField extends Field {
private final BytesRef payload;
private final CompletionTokenStream.ToFiniteStrings toFiniteStrings;

public SuggestField(String name, Reader value, FieldType type, BytesRef payload, CompletionTokenStream.ToFiniteStrings toFiniteStrings) {
super(name, value, type);
this.payload = payload;
Expand Down Expand Up @@ -342,7 +342,7 @@ public XContentBuilder toXContent(XContentBuilder builder, Params params) throws
builder.field(Fields.PAYLOADS, this.payloads);
builder.field(Fields.PRESERVE_SEPARATORS, this.preserveSeparators);
builder.field(Fields.PRESERVE_POSITION_INCREMENTS, this.preservePositionIncrements);
builder.field(Fields.MAX_INPUT_LENGTH, this.maxInputLength);
builder.field(Fields.MAX_INPUT_LENGTH, this.maxInputLength);
return builder.endObject();
}

Expand Down Expand Up @@ -379,27 +379,27 @@ public String value(Object value) {
public boolean isStoringPayloads() {
return payloads;
}

@Override
public void merge(Mapper mergeWith, MergeContext mergeContext) throws MergeMappingException {
super.merge(mergeWith, mergeContext);
CompletionFieldMapper fieldMergeWith = (CompletionFieldMapper) mergeWith;
if (payloads != fieldMergeWith.payloads) {
mergeContext.addConflict("mapper [" + names.fullName() + "] has different payload values");
}
if (preservePositionIncrements != fieldMergeWith.preservePositionIncrements) {
if (preservePositionIncrements != fieldMergeWith.preservePositionIncrements) {
mergeContext.addConflict("mapper [" + names.fullName() + "] has different 'preserve_position_increments' values");
}
if (preserveSeparators != fieldMergeWith.preserveSeparators) {
if (preserveSeparators != fieldMergeWith.preserveSeparators) {
mergeContext.addConflict("mapper [" + names.fullName() + "] has different 'preserve_separators' values");
}
if (!mergeContext.mergeFlags().simulate()) {
this.maxInputLength = fieldMergeWith.maxInputLength;
}
}

private static final char END_LABEL = 0x00;

// this should be package private but our tests don't allow it.
public static boolean isReservedChar(char character) {
/* we also use 0xFF as a SEP_LABEL in the suggester but it's not valid UTF-8 so no need to check.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -31,8 +31,11 @@
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.IOUtils;
import org.apache.lucene.util.IntsRef;
import org.apache.lucene.util.fst.*;
import org.apache.lucene.util.fst.ByteSequenceOutputs;
import org.apache.lucene.util.fst.FST;
import org.apache.lucene.util.fst.PairOutputs;
import org.apache.lucene.util.fst.PairOutputs.Pair;
import org.apache.lucene.util.fst.PositiveIntOutputs;
import org.elasticsearch.common.regex.Regex;
import org.elasticsearch.index.mapper.FieldMapper;
import org.elasticsearch.search.suggest.completion.Completion090PostingsFormat.CompletionLookupProvider;
Expand All @@ -50,10 +53,10 @@ public class AnalyzingCompletionLookupProvider extends CompletionLookupProvider

private static final int MAX_SURFACE_FORMS_PER_ANALYZED_FORM = 256;
private static final int MAX_GRAPH_EXPANSIONS = -1;

public static final String CODEC_NAME = "analyzing";
public static final int CODEC_VERSION = 1;

private boolean preserveSep;
private boolean preservePositionIncrements;
private int maxSurfaceFormsPerAnalyzedForm;
Expand Down Expand Up @@ -84,6 +87,7 @@ public FieldsConsumer consumer(final IndexOutput output) throws IOException {
CodecUtil.writeHeader(output, CODEC_NAME, CODEC_VERSION);
return new FieldsConsumer() {
private Map<FieldInfo, Long> fieldOffsets = new HashMap<FieldInfo, Long>();

@Override
public void close() throws IOException {
try { /*
Expand Down Expand Up @@ -179,7 +183,7 @@ public void addPosition(int position, BytesRef payload, int startOffset, int end
analyzingSuggestLookupProvider.parsePayload(payload, spare);
builder.addSurface(spare.surfaceForm, spare.payload, spare.weight);
// multi fields have the same surface form so we sum up here
maxAnalyzedPathsForOneInput = Math.max(maxAnalyzedPathsForOneInput, position+1);
maxAnalyzedPathsForOneInput = Math.max(maxAnalyzedPathsForOneInput, position + 1);
}

@Override
Expand All @@ -189,7 +193,9 @@ public void finishDoc() throws IOException {
public int getMaxAnalyzedPathsForOneInput() {
return maxAnalyzedPathsForOneInput;
}
};
}

;


@Override
Expand Down Expand Up @@ -225,11 +231,11 @@ public LookupFactory load(IndexInput input) throws IOException {
return new LookupFactory() {
@Override
public Lookup getLookup(FieldMapper<?> mapper, CompletionSuggestionContext suggestionContext) {
AnalyzingSuggestHolder analyzingSuggestHolder = lookupMap.get(mapper.names().fullName());
AnalyzingSuggestHolder analyzingSuggestHolder = lookupMap.get(mapper.names().indexName());
if (analyzingSuggestHolder == null) {
return null;
}
int flags = analyzingSuggestHolder.preserveSep? XAnalyzingSuggester.PRESERVE_SEP : 0;
int flags = analyzingSuggestHolder.preserveSep ? XAnalyzingSuggester.PRESERVE_SEP : 0;

XAnalyzingSuggester suggester;
if (suggestionContext.isFuzzy()) {
Expand All @@ -251,14 +257,14 @@ public Lookup getLookup(FieldMapper<?> mapper, CompletionSuggestionContext sugge
}

@Override
public CompletionStats stats(String ... fields) {
public CompletionStats stats(String... fields) {
long sizeInBytes = 0;
TObjectLongHashMap<String> completionFields = null;
if (fields != null && fields.length > 0) {
if (fields != null && fields.length > 0) {
completionFields = new TObjectLongHashMap<String>(fields.length);
}

for (Map.Entry<String, AnalyzingSuggestHolder> entry: lookupMap.entrySet()) {
for (Map.Entry<String, AnalyzingSuggestHolder> entry : lookupMap.entrySet()) {
sizeInBytes += entry.getValue().fst.sizeInBytes();
if (fields == null || fields.length == 0) {
continue;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@

import com.carrotsearch.randomizedtesting.generators.RandomStrings;
import com.google.common.collect.Lists;
import org.elasticsearch.AbstractSharedClusterTest;
import org.elasticsearch.action.admin.cluster.health.ClusterHealthStatus;
import org.elasticsearch.action.admin.indices.mapping.put.PutMappingResponse;
import org.elasticsearch.action.admin.indices.optimize.OptimizeResponse;
Expand All @@ -39,7 +40,6 @@
import org.elasticsearch.search.suggest.completion.CompletionSuggestionBuilder;
import org.elasticsearch.search.suggest.completion.CompletionSuggestionFuzzyBuilder;
import org.elasticsearch.test.hamcrest.ElasticsearchAssertions;
import org.elasticsearch.AbstractSharedClusterTest;
import org.junit.Test;

import java.io.IOException;
Expand Down Expand Up @@ -370,6 +370,7 @@ public void testThatUpgradeToMultiFieldWorks() throws Exception {
.startObject(TYPE).startObject("properties")
.startObject(FIELD)
.field("type", "multi_field")
.field("path", "just_name")
.startObject("fields")
.startObject(FIELD).field("type", "string").endObject()
.startObject("suggest").field("type", "completion").field("index_analyzer", "simple").field("search_analyzer", "simple").endObject()
Expand All @@ -381,15 +382,15 @@ public void testThatUpgradeToMultiFieldWorks() throws Exception {
assertThat(putMappingResponse.isAcknowledged(), is(true));

SuggestResponse suggestResponse = client().prepareSuggest(INDEX).addSuggestion(
new CompletionSuggestionBuilder("suggs").field(FIELD + ".suggest").text("f").size(10)
new CompletionSuggestionBuilder("suggs").field("suggest").text("f").size(10)
).execute().actionGet();
assertSuggestions(suggestResponse, "suggs");

client().prepareIndex(INDEX, TYPE, "1").setRefresh(true).setSource(jsonBuilder().startObject().field(FIELD, "Foo Fighters").endObject()).get();
waitForRelocation(ClusterHealthStatus.GREEN);

SuggestResponse afterReindexingResponse = client().prepareSuggest(INDEX).addSuggestion(
new CompletionSuggestionBuilder("suggs").field(FIELD + ".suggest").text("f").size(10)
new CompletionSuggestionBuilder("suggs").field("suggest").text("f").size(10)
).execute().actionGet();
assertSuggestions(afterReindexingResponse, "suggs", "Foo Fighters");
}
Expand Down Expand Up @@ -684,7 +685,7 @@ private void createData(boolean optimize) throws IOException, InterruptedExcepti
client().admin().indices().prepareOptimize(INDEX).execute().actionGet();
}
}

@Test // see #3555
public void testPrunedSegments() throws IOException {
createIndexAndMappingAndSettings(settingsBuilder().put(SETTING_NUMBER_OF_SHARDS, 1).put(SETTING_NUMBER_OF_REPLICAS, 0), "standard", "standard", false, false, false);
Expand All @@ -696,43 +697,43 @@ public void testPrunedSegments() throws IOException {
).get();
client().prepareIndex(INDEX, TYPE, "2").setSource(jsonBuilder()
.startObject()
.field("somefield", "somevalue")
.field("somefield", "somevalue")
.endObject()
).get(); // we have 2 docs in a segment...
OptimizeResponse actionGet = client().admin().indices().prepareOptimize().setFlush(true).setMaxNumSegments(1).setRefresh(true).execute().actionGet();
assertNoFailures(actionGet);
// update the first one and then merge.. the target segment will have no value in FIELD
client().prepareIndex(INDEX, TYPE, "1").setSource(jsonBuilder()
.startObject()
.field("somefield", "somevalue")
.field("somefield", "somevalue")
.endObject()
).get();
actionGet = client().admin().indices().prepareOptimize().setFlush(true).setMaxNumSegments(1).setRefresh(true).execute().actionGet();
assertNoFailures(actionGet);

assertSuggestions("b");
assertThat(2l, equalTo(client().prepareCount(INDEX).get().getCount()));
for(IndexShardSegments seg : client().admin().indices().prepareSegments().get().getIndices().get(INDEX)) {
for (IndexShardSegments seg : client().admin().indices().prepareSegments().get().getIndices().get(INDEX)) {
ShardSegments[] shards = seg.getShards();
for (ShardSegments shardSegments : shards) {
assertThat(1, equalTo(shardSegments.getSegments().size()));
}
}
}

@Test
public void testMaxFieldLength() throws IOException {
client().admin().indices().prepareCreate(INDEX).get();
int iters = atLeast(10);
for (int i = 0; i < iters; i++) {
int len = between(3, 50);
String str = replaceReservedChars(randomRealisticUnicodeOfCodepointLengthBetween(len+1, atLeast(len + 2)), (char)0x01);
String str = replaceReservedChars(randomRealisticUnicodeOfCodepointLengthBetween(len + 1, atLeast(len + 2)), (char) 0x01);
ElasticsearchAssertions.assertAcked(client().admin().indices().preparePutMapping(INDEX).setType(TYPE).setSource(jsonBuilder().startObject()
.startObject(TYPE).startObject("properties")
.startObject(FIELD)
.field("type", "completion")
.field("max_input_len", len)
// upgrade mapping each time
// upgrade mapping each time
.field("analyzer", "keyword")
.endObject()
.endObject().endObject()
Expand All @@ -748,14 +749,14 @@ public void testMaxFieldLength() throws IOException {
assertSuggestions(str.substring(0, prefixLen), "foobar");
if (len + 1 < str.length()) {
assertSuggestions(str.substring(0, CompletionFieldMapper.correctSubStringLen(str,
len + (Character.isHighSurrogate(str.charAt(len-1)) ? 2 : 1))));
len + (Character.isHighSurrogate(str.charAt(len - 1)) ? 2 : 1))));
}
}
}
}

@Test
// see #3596
public void testVeryLongInput() throws IOException {
public void testVeryLongInput() throws IOException {
client().admin().indices().prepareCreate(INDEX).get();
ElasticsearchAssertions.assertAcked(client().admin().indices().preparePutMapping(INDEX).setType(TYPE).setSource(jsonBuilder().startObject()
.startObject(TYPE).startObject("properties")
Expand All @@ -766,19 +767,19 @@ public void testVeryLongInput() throws IOException {
.endObject()));
ensureYellow();
// can cause stack overflow without the default max_input_len
String longString = replaceReservedChars(randomRealisticUnicodeOfLength(atLeast(5000)), (char)0x01);
String longString = replaceReservedChars(randomRealisticUnicodeOfLength(atLeast(5000)), (char) 0x01);
client().prepareIndex(INDEX, TYPE, "1").setSource(jsonBuilder()
.startObject().startObject(FIELD)
.startArray("input").value(longString).endArray()
.field("output", "foobar")
.endObject().endObject()
).setRefresh(true).get();

}

// see #3648
@Test(expected = MapperParsingException.class)
public void testReservedChars() throws IOException {
public void testReservedChars() throws IOException {
client().admin().indices().prepareCreate(INDEX).get();
ElasticsearchAssertions.assertAcked(client().admin().indices().preparePutMapping(INDEX).setType(TYPE).setSource(jsonBuilder().startObject()
.startObject(TYPE).startObject("properties")
Expand All @@ -789,16 +790,16 @@ public void testReservedChars() throws IOException {
.endObject()));
ensureYellow();
// can cause stack overflow without the default max_input_len
String string = "foo" + (char)0x00 + "bar";
String string = "foo" + (char) 0x00 + "bar";
client().prepareIndex(INDEX, TYPE, "1").setSource(jsonBuilder()
.startObject().startObject(FIELD)
.startArray("input").value(string).endArray()
.field("output", "foobar")
.endObject().endObject()
).setRefresh(true).get();

}

private static String replaceReservedChars(String input, char replacement) {
char[] charArray = input.toCharArray();
for (int i = 0; i < charArray.length; i++) {
Expand Down

0 comments on commit 714aaa4

Please sign in to comment.