Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[Star tree] Support for keyword changes as part of star tree mapper #16103

Closed
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,8 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
- Add _list/shards API as paginated alternate to _cat/shards ([#14641](https://github.com/opensearch-project/OpenSearch/pull/14641))
- Latency and Memory allocation improvements to Multi Term Aggregation queries ([#14993](https://github.com/opensearch-project/OpenSearch/pull/14993))
- Flat object field use IndexOrDocValuesQuery to optimize query ([#14383](https://github.com/opensearch-project/OpenSearch/issues/14383))

- [Star tree] Adding date field rounding support in star tree ([#15249](https://github.com/opensearch-project/OpenSearch/issues/15249))
- [Star tree] Support for keyword changes as part of star tree mapper ([#16103](https://github.com/opensearch-project/OpenSearch/issues/16103))
### Dependencies
- Bump `com.azure:azure-identity` from 1.13.0 to 1.13.2 ([#15578](https://github.com/opensearch-project/OpenSearch/pull/15578))
- Bump `protobuf` from 3.22.3 to 3.25.4 ([#15684](https://github.com/opensearch-project/OpenSearch/pull/15684))
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,7 @@ public class StarTreeMapperIT extends OpenSearchIntegTestCase {
.put(IndexSettings.INDEX_TRANSLOG_FLUSH_THRESHOLD_SIZE_SETTING.getKey(), new ByteSizeValue(512, ByteSizeUnit.MB))
.build();

private static XContentBuilder createMinimalTestMapping(boolean invalidDim, boolean invalidMetric, boolean keywordDim) {
private static XContentBuilder createMinimalTestMapping(boolean invalidDim, boolean invalidMetric, boolean ipdim) {
try {
return jsonBuilder().startObject()
.startObject("composite")
Expand All @@ -68,12 +68,15 @@ private static XContentBuilder createMinimalTestMapping(boolean invalidDim, bool
.endObject()
.startArray("ordered_dimensions")
.startObject()
.field("name", getDim(invalidDim, keywordDim))
.field("name", getDim(invalidDim, ipdim))
.endObject()
.startObject()
.field("name", "keyword_dv")
.endObject()
.endArray()
.startArray("metrics")
.startObject()
.field("name", getDim(invalidMetric, false))
.field("name", getMetric(invalidMetric, false))
.endObject()
.endArray()
.endObject()
Expand All @@ -99,6 +102,10 @@ private static XContentBuilder createMinimalTestMapping(boolean invalidDim, bool
.field("type", "keyword")
.field("doc_values", false)
.endObject()
.startObject("ip")
.field("type", "ip")
.field("doc_values", false)
.endObject()
.endObject()
.endObject();
} catch (IOException e) {
Expand Down Expand Up @@ -356,10 +363,19 @@ private XContentBuilder getMappingWithDuplicateFields(boolean isDuplicateDim, bo
}

private static String getDim(boolean hasDocValues, boolean isKeyword) {
if (hasDocValues) {
return random().nextBoolean() ? "numeric" : "keyword";
} else if (isKeyword) {
return "ip";
}
return "numeric_dv";
}

private static String getMetric(boolean hasDocValues, boolean isKeyword) {
if (hasDocValues) {
return "numeric";
} else if (isKeyword) {
return "keyword";
return "ip";
bharath-techie marked this conversation as resolved.
Show resolved Hide resolved
}
return "numeric_dv";
}
Expand Down Expand Up @@ -398,6 +414,7 @@ public void testValidCompositeIndex() {
assertEquals(expectedTimeUnits.get(i).shortName(), dateDim.getSortedCalendarIntervals().get(i).shortName());
}
assertEquals("numeric_dv", starTreeFieldType.getDimensions().get(1).getField());
assertEquals("keyword_dv", starTreeFieldType.getDimensions().get(2).getField());
assertEquals("numeric_dv", starTreeFieldType.getMetrics().get(0).getField());
List<MetricStat> expectedMetrics = Arrays.asList(MetricStat.VALUE_COUNT, MetricStat.SUM, MetricStat.AVG);
assertEquals(expectedMetrics, starTreeFieldType.getMetrics().get(0).getMetrics());
Expand Down Expand Up @@ -665,10 +682,7 @@ public void testInvalidDimCompositeIndex() {
IllegalArgumentException.class,
() -> prepareCreate(TEST_INDEX).setSettings(settings).setMapping(createMinimalTestMapping(true, false, false)).get()
);
assertEquals(
"Aggregations not supported for the dimension field [numeric] with field type [integer] as part of star tree field",
ex.getMessage()
);
assertTrue(ex.getMessage().startsWith("Aggregations not supported for the dimension field "));
}

public void testMaxDimsCompositeIndex() {
Expand Down Expand Up @@ -734,7 +748,7 @@ public void testUnsupportedDim() {
() -> prepareCreate(TEST_INDEX).setSettings(settings).setMapping(createMinimalTestMapping(false, false, true)).get()
);
assertEquals(
"Failed to parse mapping [_doc]: unsupported field type associated with dimension [keyword] as part of star tree field [startree-1]",
"Failed to parse mapping [_doc]: unsupported field type associated with dimension [ip] as part of star tree field [startree-1]",
ex.getMessage()
);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@
import java.util.stream.Collectors;

import static org.opensearch.index.compositeindex.datacube.DateDimension.CALENDAR_INTERVALS;
import static org.opensearch.index.compositeindex.datacube.KeywordDimension.KEYWORD;

/**
* Dimension factory class mainly used to parse and create dimension from the mappings
Expand All @@ -43,6 +44,8 @@
return parseAndCreateDateDimension(name, dimensionMap, c);
case NumericDimension.NUMERIC:
return new NumericDimension(name);
case KEYWORD:
return new KeywordDimension(name);
default:
throw new IllegalArgumentException(
String.format(Locale.ROOT, "unsupported field type associated with dimension [%s] as part of star tree field", name)
Expand All @@ -56,16 +59,23 @@
Map<String, Object> dimensionMap,
Mapper.TypeParser.ParserContext c
) {
if (builder.getSupportedDataCubeDimensionType().isPresent()
&& builder.getSupportedDataCubeDimensionType().get().equals(DimensionType.DATE)) {
return parseAndCreateDateDimension(name, dimensionMap, c);
} else if (builder.getSupportedDataCubeDimensionType().isPresent()
&& builder.getSupportedDataCubeDimensionType().get().equals(DimensionType.NUMERIC)) {
if (builder.getSupportedDataCubeDimensionType().isEmpty()) {
throw new IllegalArgumentException(
String.format(Locale.ROOT, "unsupported field type associated with star tree dimension [%s]", name)
);
}
switch (builder.getSupportedDataCubeDimensionType().get()) {
case DATE:
return parseAndCreateDateDimension(name, dimensionMap, c);
case NUMERIC:
return new NumericDimension(name);
}
throw new IllegalArgumentException(
String.format(Locale.ROOT, "unsupported field type associated with star tree dimension [%s]", name)
);
case KEYWORD:
return new KeywordDimension(name);
default:
throw new IllegalArgumentException(
String.format(Locale.ROOT, "unsupported field type associated with star tree dimension [%s]", name)

Check warning on line 76 in server/src/main/java/org/opensearch/index/compositeindex/datacube/DimensionFactory.java

View check run for this annotation

Codecov / codecov/patch

server/src/main/java/org/opensearch/index/compositeindex/datacube/DimensionFactory.java#L75-L76

Added lines #L75 - L76 were not covered by tests
);
}
}

private static DateDimension parseAndCreateDateDimension(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -27,5 +27,11 @@ public enum DimensionType {
* Represents a date dimension type.
* This is used for dimensions that contain date or timestamp values.
*/
DATE
DATE,

/**
* Represents a keyword dimension type.
* This is used for dimensions that contain keyword ordinals.
*/
KEYWORD
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,75 @@
/*
* SPDX-License-Identifier: Apache-2.0
*
* The OpenSearch Contributors require contributions made to
* this file be licensed under the Apache-2.0 license or a
* compatible open source license.
*/

package org.opensearch.index.compositeindex.datacube;

import org.opensearch.common.annotation.ExperimentalApi;
import org.opensearch.core.xcontent.XContentBuilder;
import org.opensearch.index.mapper.CompositeDataCubeFieldType;

import java.io.IOException;
import java.util.List;
import java.util.Objects;
import java.util.function.Consumer;

/**
* Composite index keyword dimension class
*
* @opensearch.experimental
*/
@ExperimentalApi
public class KeywordDimension implements Dimension {
public static final String KEYWORD = "keyword";
private final String field;

public KeywordDimension(String field) {
this.field = field;
}

@Override
public String getField() {
return field;
}

@Override
public int getNumSubDimensions() {
return 1;
}

@Override
public void setDimensionValues(Long value, Consumer<Long> dimSetter) {
dimSetter.accept(value);
}

@Override
public List<String> getSubDimensionNames() {
return List.of(field);
}

@Override
public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException {
builder.startObject();
builder.field(CompositeDataCubeFieldType.NAME, field);
builder.field(CompositeDataCubeFieldType.TYPE, KEYWORD);
builder.endObject();
return builder;
}

@Override
public boolean equals(Object o) {
if (this == o) return true;
if (o == null || getClass() != o.getClass()) return false;
KeywordDimension dimension = (KeywordDimension) o;
return Objects.equals(field, dimension.getField());
}

@Override
public int hashCode() {
return Objects.hash(field);

Check warning on line 73 in server/src/main/java/org/opensearch/index/compositeindex/datacube/KeywordDimension.java

View check run for this annotation

Codecov / codecov/patch

server/src/main/java/org/opensearch/index/compositeindex/datacube/KeywordDimension.java#L73

Added line #L73 was not covered by tests
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
import org.apache.lucene.store.IndexInput;
import org.apache.lucene.store.IndexOutput;
import org.apache.lucene.store.RandomAccessInput;
import org.opensearch.common.annotation.ExperimentalApi;
import org.opensearch.common.util.io.IOUtils;
import org.opensearch.index.compositeindex.datacube.startree.StarTreeDocument;
import org.opensearch.index.compositeindex.datacube.startree.StarTreeField;
Expand Down Expand Up @@ -45,7 +46,9 @@
* <p>The set of 'star-tree.documents' files is maintained, and a tracker array is used to keep track of the start document ID for each file.
* Once the number of files reaches a set threshold, the files are merged.
*
* @opensearch.experimental
*/
@ExperimentalApi
public class StarTreeDocsFileManager extends AbstractDocumentsFileManager implements Closeable {
private static final Logger logger = LogManager.getLogger(StarTreeDocsFileManager.class);
private static final String STAR_TREE_DOC_FILE_NAME = "star-tree.documents";
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,7 @@
import org.opensearch.core.xcontent.XContentParser;
import org.opensearch.index.analysis.IndexAnalyzers;
import org.opensearch.index.analysis.NamedAnalyzer;
import org.opensearch.index.compositeindex.datacube.DimensionType;
import org.opensearch.index.fielddata.IndexFieldData;
import org.opensearch.index.fielddata.plain.SortedSetOrdinalsIndexFieldData;
import org.opensearch.index.query.QueryShardContext;
Expand All @@ -73,6 +74,7 @@
import java.util.List;
import java.util.Map;
import java.util.Objects;
import java.util.Optional;
import java.util.function.Supplier;

import static org.opensearch.search.SearchService.ALLOW_EXPENSIVE_QUERIES;
Expand Down Expand Up @@ -254,6 +256,11 @@ public KeywordFieldMapper build(BuilderContext context) {
this
);
}

@Override
public Optional<DimensionType> getSupportedDataCubeDimensionType() {
return Optional.of(DimensionType.KEYWORD);
}
}

public static final TypeParser PARSER = new TypeParser((n, c) -> new Builder(n, c.getIndexAnalyzers()));
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,104 @@
/*
* SPDX-License-Identifier: Apache-2.0
*
* The OpenSearch Contributors require contributions made to
* this file be licensed under the Apache-2.0 license or a
* compatible open source license.
*/

package org.opensearch.index.compositeindex.datacube;

import org.opensearch.common.settings.Settings;
import org.opensearch.index.mapper.Mapper;
import org.opensearch.test.OpenSearchTestCase;

import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashMap;
import java.util.List;
import java.util.Map;

import static org.mockito.Mockito.mock;
import static org.mockito.Mockito.when;

public class DimensionFactoryTests extends OpenSearchTestCase {

public void testParseAndCreateDateDimension() {
String name = "dateDimension";
Map<String, Object> dimensionMap = new HashMap<>();
List<String> calendarIntervals = Arrays.asList("day", "month");
dimensionMap.put("calendar_intervals", calendarIntervals);

Mapper.TypeParser.ParserContext mockContext = mock(Mapper.TypeParser.ParserContext.class);
when(mockContext.getSettings()).thenReturn(Settings.EMPTY);

Dimension dimension = DimensionFactory.parseAndCreateDimension(name, DateDimension.DATE, dimensionMap, mockContext);

assertTrue(dimension instanceof DateDimension);
assertEquals(2, dimension.getNumSubDimensions());
for (String interval : calendarIntervals) {
assertTrue(dimension.getSubDimensionNames().contains(name + "_" + interval));
}
assertEquals(name, dimension.getField());
DateDimension dateDimension = (DateDimension) dimension;
assertEquals(2, dateDimension.getIntervals().size());
}

public void testParseAndCreateNumericDimension() {
String name = "numericDimension";
Dimension dimension = DimensionFactory.parseAndCreateDimension(name, NumericDimension.NUMERIC, new HashMap<>(), null);

assertTrue(dimension instanceof NumericDimension);
assertEquals(1, dimension.getNumSubDimensions());
assertTrue(dimension.getSubDimensionNames().contains(name));
assertEquals(name, dimension.getField());
}

public void testParseAndCreateKeywordDimension() {
String name = "keywordDimension";
Dimension dimension = DimensionFactory.parseAndCreateDimension(name, KeywordDimension.KEYWORD, new HashMap<>(), null);
KeywordDimension kd = new KeywordDimension(name);
assertTrue(dimension instanceof KeywordDimension);
assertEquals(1, dimension.getNumSubDimensions());
assertTrue(dimension.getSubDimensionNames().contains(name));
assertEquals(dimension, kd);
assertEquals(name, dimension.getField());
List<Long> dimValue = new ArrayList<>();
kd.setDimensionValues(1L, dimValue::add);
assertEquals((Long) 1L, dimValue.get(0));
}

public void testParseAndCreateDimensionWithUnsupportedType() {
String name = "unsupportedDimension";
String unsupportedType = "unsupported";

IllegalArgumentException exception = expectThrows(
IllegalArgumentException.class,
() -> DimensionFactory.parseAndCreateDimension(name, unsupportedType, new HashMap<>(), null)
);
assertTrue(exception.getMessage().contains("unsupported field type"));
}

public void testParseAndCreateDimensionWithBuilder() {
String name = "builderDimension";
Mapper.Builder mockBuilder = mock(Mapper.Builder.class);
when(mockBuilder.getSupportedDataCubeDimensionType()).thenReturn(java.util.Optional.of(DimensionType.KEYWORD));

Dimension dimension = DimensionFactory.parseAndCreateDimension(name, mockBuilder, new HashMap<>(), null);

assertTrue(dimension instanceof KeywordDimension);
assertEquals(name, dimension.getField());
}

public void testParseAndCreateDimensionWithUnsupportedBuilder() {
String name = "unsupportedBuilderDimension";
Mapper.Builder mockBuilder = mock(Mapper.Builder.class);
when(mockBuilder.getSupportedDataCubeDimensionType()).thenReturn(java.util.Optional.empty());

IllegalArgumentException exception = expectThrows(
IllegalArgumentException.class,
() -> DimensionFactory.parseAndCreateDimension(name, mockBuilder, new HashMap<>(), null)
);
assertTrue(exception.getMessage().contains("unsupported field type"));
}
}
Loading
Loading