Skip to content

Commit

Permalink
star tree keyword changes
Browse files Browse the repository at this point in the history
Signed-off-by: Bharathwaj G <[email protected]>
  • Loading branch information
bharath-techie committed Oct 3, 2024
1 parent eb43e37 commit 49dd2f5
Show file tree
Hide file tree
Showing 9 changed files with 224 additions and 38 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,7 @@ public class StarTreeMapperIT extends OpenSearchIntegTestCase {
.put(IndexSettings.INDEX_TRANSLOG_FLUSH_THRESHOLD_SIZE_SETTING.getKey(), new ByteSizeValue(512, ByteSizeUnit.MB))
.build();

private static XContentBuilder createMinimalTestMapping(boolean invalidDim, boolean invalidMetric, boolean keywordDim) {
private static XContentBuilder createMinimalTestMapping(boolean invalidDim, boolean invalidMetric, boolean ipdim) {
try {
return jsonBuilder().startObject()
.startObject("composite")
Expand All @@ -68,12 +68,15 @@ private static XContentBuilder createMinimalTestMapping(boolean invalidDim, bool
.endObject()
.startArray("ordered_dimensions")
.startObject()
.field("name", getDim(invalidDim, keywordDim))
.field("name", getDim(invalidDim, ipdim))
.endObject()
.startObject()
.field("name", "keyword_dv")
.endObject()
.endArray()
.startArray("metrics")
.startObject()
.field("name", getDim(invalidMetric, false))
.field("name", getMetric(invalidMetric, false))
.endObject()
.endArray()
.endObject()
Expand All @@ -99,6 +102,10 @@ private static XContentBuilder createMinimalTestMapping(boolean invalidDim, bool
.field("type", "keyword")
.field("doc_values", false)
.endObject()
.startObject("ip")
.field("type", "ip")
.field("doc_values", false)
.endObject()
.endObject()
.endObject();
} catch (IOException e) {
Expand Down Expand Up @@ -356,10 +363,19 @@ private XContentBuilder getMappingWithDuplicateFields(boolean isDuplicateDim, bo
}

private static String getDim(boolean hasDocValues, boolean isKeyword) {
if (hasDocValues) {
return random().nextBoolean() ? "numeric" : "keyword";
} else if (isKeyword) {
return "ip";
}
return "numeric_dv";
}

private static String getMetric(boolean hasDocValues, boolean isKeyword) {
if (hasDocValues) {
return "numeric";
} else if (isKeyword) {
return "keyword";
return "ip";
}
return "numeric_dv";
}
Expand Down Expand Up @@ -398,6 +414,7 @@ public void testValidCompositeIndex() {
assertEquals(expectedTimeUnits.get(i).shortName(), dateDim.getSortedCalendarIntervals().get(i).shortName());
}
assertEquals("numeric_dv", starTreeFieldType.getDimensions().get(1).getField());
assertEquals("keyword_dv", starTreeFieldType.getDimensions().get(2).getField());
assertEquals("numeric_dv", starTreeFieldType.getMetrics().get(0).getField());
List<MetricStat> expectedMetrics = Arrays.asList(MetricStat.VALUE_COUNT, MetricStat.SUM, MetricStat.AVG);
assertEquals(expectedMetrics, starTreeFieldType.getMetrics().get(0).getMetrics());
Expand Down Expand Up @@ -665,10 +682,7 @@ public void testInvalidDimCompositeIndex() {
IllegalArgumentException.class,
() -> prepareCreate(TEST_INDEX).setSettings(settings).setMapping(createMinimalTestMapping(true, false, false)).get()
);
assertEquals(
"Aggregations not supported for the dimension field [numeric] with field type [integer] as part of star tree field",
ex.getMessage()
);
assertTrue(ex.getMessage().startsWith("Aggregations not supported for the dimension field "));
}

public void testMaxDimsCompositeIndex() {
Expand Down Expand Up @@ -734,7 +748,7 @@ public void testUnsupportedDim() {
() -> prepareCreate(TEST_INDEX).setSettings(settings).setMapping(createMinimalTestMapping(false, false, true)).get()
);
assertEquals(
"Failed to parse mapping [_doc]: unsupported field type associated with dimension [keyword] as part of star tree field [startree-1]",
"Failed to parse mapping [_doc]: unsupported field type associated with dimension [ip] as part of star tree field [startree-1]",
ex.getMessage()
);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
import org.apache.lucene.index.SegmentInfo;
import org.apache.lucene.index.SegmentWriteState;
import org.apache.lucene.index.SortedNumericDocValues;
import org.apache.lucene.index.SortedSetDocValues;
import org.apache.lucene.search.DocIdSetIterator;
import org.apache.lucene.store.IndexOutput;
import org.opensearch.common.annotation.ExperimentalApi;
Expand All @@ -34,6 +35,7 @@
import org.opensearch.index.compositeindex.datacube.startree.index.StarTreeValues;
import org.opensearch.index.mapper.CompositeMappedFieldType;
import org.opensearch.index.mapper.DocCountFieldMapper;
import org.opensearch.index.mapper.KeywordFieldMapper;
import org.opensearch.index.mapper.MapperService;
import org.opensearch.index.mapper.StarTreeMapper;

Expand Down Expand Up @@ -82,14 +84,7 @@ public Composite99DocValuesWriter(DocValuesConsumer delegate, SegmentWriteState
this.compositeMappedFieldTypes = mapperService.getCompositeFieldTypes();
compositeFieldSet = new HashSet<>();
segmentFieldSet = new HashSet<>();
// TODO : add integ test for this
for (FieldInfo fi : this.state.fieldInfos) {
if (DocValuesType.SORTED_NUMERIC.equals(fi.getDocValuesType())) {
segmentFieldSet.add(fi.name);
} else if (fi.name.equals(DocCountFieldMapper.NAME)) {
segmentFieldSet.add(fi.name);
}
}
addStarTreeSupportedFieldsFromSegment();
for (CompositeMappedFieldType type : compositeMappedFieldTypes) {
compositeFieldSet.addAll(type.fields());
}
Expand Down Expand Up @@ -148,6 +143,19 @@ public Composite99DocValuesWriter(DocValuesConsumer delegate, SegmentWriteState
segmentHasCompositeFields = Collections.disjoint(segmentFieldSet, compositeFieldSet) == false;
}

private void addStarTreeSupportedFieldsFromSegment() {
// TODO : add integ test for this
for (FieldInfo fi : this.state.fieldInfos) {
if (DocValuesType.SORTED_NUMERIC.equals(fi.getDocValuesType())) {
segmentFieldSet.add(fi.name);
} else if (DocValuesType.SORTED_SET.equals(fi.getDocValuesType())) {
segmentFieldSet.add(fi.name);
} else if (fi.name.equals(DocCountFieldMapper.NAME)) {
segmentFieldSet.add(fi.name);
}
}
}

@Override
public void addNumericField(FieldInfo field, DocValuesProducer valuesProducer) throws IOException {
delegate.addNumericField(field, valuesProducer);
Expand Down Expand Up @@ -179,6 +187,10 @@ public void addSortedNumericField(FieldInfo field, DocValuesProducer valuesProdu
@Override
public void addSortedSetField(FieldInfo field, DocValuesProducer valuesProducer) throws IOException {
delegate.addSortedSetField(field, valuesProducer);
// Perform this only during flush flow
if (mergeState.get() == null && segmentHasCompositeFields) {
createCompositeIndicesIfPossible(valuesProducer, field);
}
}

@Override
Expand Down Expand Up @@ -235,6 +247,7 @@ private void createCompositeIndicesIfPossible(DocValuesProducer valuesProducer,
* Add empty doc values for fields not present in segment
*/
private void addDocValuesForEmptyField(String compositeField) {
// special case for doc count
if (compositeField.equals(DocCountFieldMapper.NAME)) {
fieldProducerMap.put(compositeField, new EmptyDocValuesProducer() {
@Override
Expand All @@ -243,16 +256,29 @@ public NumericDocValues getNumeric(FieldInfo field) {
}
});
} else {
fieldProducerMap.put(compositeField, new EmptyDocValuesProducer() {
@Override
public SortedNumericDocValues getSortedNumeric(FieldInfo field) {
return DocValues.emptySortedNumeric();
}
});
if(isSortedSetField(compositeField)) {
fieldProducerMap.put(compositeField, new EmptyDocValuesProducer() {
@Override
public SortedSetDocValues getSortedSet(FieldInfo field) {
return DocValues.emptySortedSet();
}
});
} else {
fieldProducerMap.put(compositeField, new EmptyDocValuesProducer() {
@Override
public SortedNumericDocValues getSortedNumeric(FieldInfo field) {
return DocValues.emptySortedNumeric();
}
});
}
}
compositeFieldSet.remove(compositeField);
}

private boolean isSortedSetField(String field) {
return mapperService.fieldType(field) instanceof KeywordFieldMapper.KeywordFieldType;
}

@Override
public void merge(MergeState mergeState) throws IOException {
this.mergeState.compareAndSet(null, mergeState);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@
import java.util.stream.Collectors;

import static org.opensearch.index.compositeindex.datacube.DateDimension.CALENDAR_INTERVALS;
import static org.opensearch.index.compositeindex.datacube.KeywordDimension.KEYWORD;

/**
* Dimension factory class mainly used to parse and create dimension from the mappings
Expand All @@ -43,6 +44,8 @@ public static Dimension parseAndCreateDimension(
return parseAndCreateDateDimension(name, dimensionMap, c);
case NumericDimension.NUMERIC:
return new NumericDimension(name);
case KEYWORD:
return new KeywordDimension(name);
default:
throw new IllegalArgumentException(
String.format(Locale.ROOT, "unsupported field type associated with dimension [%s] as part of star tree field", name)
Expand All @@ -56,16 +59,23 @@ public static Dimension parseAndCreateDimension(
Map<String, Object> dimensionMap,
Mapper.TypeParser.ParserContext c
) {
if (builder.getSupportedDataCubeDimensionType().isPresent()
&& builder.getSupportedDataCubeDimensionType().get().equals(DimensionType.DATE)) {
return parseAndCreateDateDimension(name, dimensionMap, c);
} else if (builder.getSupportedDataCubeDimensionType().isPresent()
&& builder.getSupportedDataCubeDimensionType().get().equals(DimensionType.NUMERIC)) {
if (builder.getSupportedDataCubeDimensionType().isEmpty()) {
throw new IllegalArgumentException(
String.format(Locale.ROOT, "unsupported field type associated with star tree dimension [%s]", name)
);
}
switch (builder.getSupportedDataCubeDimensionType().get()) {
case DATE:
return parseAndCreateDateDimension(name, dimensionMap, c);
case NUMERIC:
return new NumericDimension(name);
}
throw new IllegalArgumentException(
String.format(Locale.ROOT, "unsupported field type associated with star tree dimension [%s]", name)
);
case KEYWORD:
return new KeywordDimension(name);
default:
throw new IllegalArgumentException(
String.format(Locale.ROOT, "unsupported field type associated with star tree dimension [%s]", name)
);
}
}

private static DateDimension parseAndCreateDateDimension(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -27,5 +27,11 @@ public enum DimensionType {
* Represents a date dimension type.
* This is used for dimensions that contain date or timestamp values.
*/
DATE
DATE,

/**
* Represents a keyword dimension type.
* This is used for dimensions that contain keyword ordinals.
*/
KEYWORD
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,75 @@
/*
* SPDX-License-Identifier: Apache-2.0
*
* The OpenSearch Contributors require contributions made to
* this file be licensed under the Apache-2.0 license or a
* compatible open source license.
*/

package org.opensearch.index.compositeindex.datacube;

import org.opensearch.common.annotation.ExperimentalApi;
import org.opensearch.core.xcontent.XContentBuilder;
import org.opensearch.index.mapper.CompositeDataCubeFieldType;

import java.io.IOException;
import java.util.List;
import java.util.Objects;

/**
* Composite index keyword dimension class
*
* @opensearch.experimental
*/
@ExperimentalApi
public class KeywordDimension implements Dimension {
public static final String KEYWORD = "keyword";
private final String field;

public KeywordDimension(String field) {
this.field = field;
}

@Override
public String getField() {
return field;
}

@Override
public int getNumSubDimensions() {
return 1;
}

@Override
public int setDimensionValues(Long value, Long[] dims, int index) {
dims[index++] = value;
return index;
}

@Override
public List<String> getDimensionFieldsNames() {
return List.of(field);
}

@Override
public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException {
builder.startObject();
builder.field(CompositeDataCubeFieldType.NAME, field);
builder.field(CompositeDataCubeFieldType.TYPE, KEYWORD);
builder.endObject();
return builder;
}

@Override
public boolean equals(Object o) {
if (this == o) return true;
if (o == null || getClass() != o.getClass()) return false;
KeywordDimension dimension = (KeywordDimension) o;
return Objects.equals(field, dimension.getField());
}

@Override
public int hashCode() {
return Objects.hash(field);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
import org.apache.lucene.store.IndexInput;
import org.apache.lucene.store.IndexOutput;
import org.apache.lucene.store.RandomAccessInput;
import org.opensearch.common.annotation.ExperimentalApi;
import org.opensearch.common.util.io.IOUtils;
import org.opensearch.index.compositeindex.datacube.startree.StarTreeDocument;
import org.opensearch.index.compositeindex.datacube.startree.StarTreeField;
Expand Down Expand Up @@ -45,7 +46,9 @@
* <p>The set of 'star-tree.documents' files is maintained, and a tracker array is used to keep track of the start document ID for each file.
* Once the number of files reaches a set threshold, the files are merged.
*
* @opensearch.experimental
*/
@ExperimentalApi
public class StarTreeDocsFileManager extends AbstractDocumentsFileManager implements Closeable {
private static final Logger logger = LogManager.getLogger(StarTreeDocsFileManager.class);
private static final String STAR_TREE_DOC_FILE_NAME = "star-tree.documents";
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,7 @@
import org.opensearch.core.xcontent.XContentParser;
import org.opensearch.index.analysis.IndexAnalyzers;
import org.opensearch.index.analysis.NamedAnalyzer;
import org.opensearch.index.compositeindex.datacube.DimensionType;
import org.opensearch.index.fielddata.IndexFieldData;
import org.opensearch.index.fielddata.plain.SortedSetOrdinalsIndexFieldData;
import org.opensearch.index.query.QueryShardContext;
Expand All @@ -73,6 +74,7 @@
import java.util.List;
import java.util.Map;
import java.util.Objects;
import java.util.Optional;
import java.util.function.Supplier;

import static org.opensearch.search.SearchService.ALLOW_EXPENSIVE_QUERIES;
Expand Down Expand Up @@ -254,6 +256,11 @@ public KeywordFieldMapper build(BuilderContext context) {
this
);
}

@Override
public Optional<DimensionType> getSupportedDataCubeDimensionType() {
return Optional.of(DimensionType.KEYWORD);
}
}

public static final TypeParser PARSER = new TypeParser((n, c) -> new Builder(n, c.getIndexAnalyzers()));
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4264,9 +4264,15 @@ public void testFlushFlowWithTimestamps() throws IOException {

compositeField = getStarTreeFieldWithDateDimension();
SortedNumericStarTreeValuesIterator d1sndv = new SortedNumericStarTreeValuesIterator(getSortedNumericMock(dimList, docsWithField));
SortedNumericStarTreeValuesIterator d2sndv = new SortedNumericStarTreeValuesIterator(getSortedNumericMock(dimList2, docsWithField2));
SortedNumericStarTreeValuesIterator m1sndv = new SortedNumericStarTreeValuesIterator(getSortedNumericMock(metricsList, metricsWithField));
SortedNumericStarTreeValuesIterator m2sndv = new SortedNumericStarTreeValuesIterator(getSortedNumericMock(metricsList, metricsWithField));
SortedNumericStarTreeValuesIterator d2sndv = new SortedNumericStarTreeValuesIterator(
getSortedNumericMock(dimList2, docsWithField2)
);
SortedNumericStarTreeValuesIterator m1sndv = new SortedNumericStarTreeValuesIterator(
getSortedNumericMock(metricsList, metricsWithField)
);
SortedNumericStarTreeValuesIterator m2sndv = new SortedNumericStarTreeValuesIterator(
getSortedNumericMock(metricsList, metricsWithField)
);
this.docValuesConsumer = LuceneDocValuesConsumerFactory.getDocValuesConsumerForCompositeCodec(
writeState,
Composite99DocValuesFormat.DATA_DOC_VALUES_CODEC,
Expand Down
Loading

0 comments on commit 49dd2f5

Please sign in to comment.