diff --git a/CHANGELOG.md b/CHANGELOG.md index b6d28d37fde1f..bc0d8251933f0 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -11,6 +11,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), - Introduce a setting to disable download of full cluster state from remote on term mismatch([#16798](https://github.com/opensearch-project/OpenSearch/pull/16798/)) - Added ability to retrieve value from DocValues in a flat_object filed([#16802](https://github.com/opensearch-project/OpenSearch/pull/16802)) - Improve performace of NumericTermAggregation by avoiding unnecessary sorting([#17252](https://github.com/opensearch-project/OpenSearch/pull/17252)) +- [Star Tree] [Search] Resolving keyword & numeric bucket aggregation with metric aggregation using star-tree ([#17165](https://github.com/opensearch-project/OpenSearch/pull/17165)) ### Dependencies - Bump `org.awaitility:awaitility` from 4.2.0 to 4.2.2 ([#17230](https://github.com/opensearch-project/OpenSearch/pull/17230)) diff --git a/server/src/main/java/org/opensearch/search/aggregations/bucket/BucketsAggregator.java b/server/src/main/java/org/opensearch/search/aggregations/bucket/BucketsAggregator.java index f075d67b0f48d..a65728b2d658a 100644 --- a/server/src/main/java/org/opensearch/search/aggregations/bucket/BucketsAggregator.java +++ b/server/src/main/java/org/opensearch/search/aggregations/bucket/BucketsAggregator.java @@ -136,6 +136,12 @@ public final void collectExistingBucket(LeafBucketCollector subCollector, int do */ public final void collectStarTreeBucket(StarTreeBucketCollector collector, long docCount, long bucketOrd, int entryBit) throws IOException { + if (bucketOrd < 0) { + bucketOrd = -1 - bucketOrd; + } else { + grow(bucketOrd + 1); + } + if (docCounts.increment(bucketOrd, docCount) == docCount) { multiBucketConsumer.accept(0); } diff --git a/server/src/main/java/org/opensearch/search/aggregations/bucket/histogram/DateHistogramAggregator.java b/server/src/main/java/org/opensearch/search/aggregations/bucket/histogram/DateHistogramAggregator.java index 2294ba6f9a2b5..54c092ff5cc42 100644 --- a/server/src/main/java/org/opensearch/search/aggregations/bucket/histogram/DateHistogramAggregator.java +++ b/server/src/main/java/org/opensearch/search/aggregations/bucket/histogram/DateHistogramAggregator.java @@ -33,18 +33,14 @@ import org.apache.lucene.index.LeafReaderContext; import org.apache.lucene.index.SortedNumericDocValues; -import org.apache.lucene.search.DocIdSetIterator; import org.apache.lucene.search.ScoreMode; import org.apache.lucene.util.CollectionUtil; -import org.apache.lucene.util.FixedBitSet; import org.opensearch.common.Nullable; import org.opensearch.common.Rounding; import org.opensearch.common.lease.Releasables; import org.opensearch.index.codec.composite.CompositeIndexFieldInfo; import org.opensearch.index.compositeindex.datacube.DateDimension; -import org.opensearch.index.compositeindex.datacube.MetricStat; import org.opensearch.index.compositeindex.datacube.startree.index.StarTreeValues; -import org.opensearch.index.compositeindex.datacube.startree.utils.StarTreeUtils; import org.opensearch.index.compositeindex.datacube.startree.utils.date.DateTimeUnitAdapter; import org.opensearch.index.compositeindex.datacube.startree.utils.date.DateTimeUnitRounding; import org.opensearch.index.compositeindex.datacube.startree.utils.iterator.SortedNumericStarTreeValuesIterator; @@ -192,9 +188,8 @@ public ScoreMode scoreMode() { protected boolean tryPrecomputeAggregationForLeaf(LeafReaderContext ctx) throws IOException { CompositeIndexFieldInfo supportedStarTree = getSupportedStarTree(this.context.getQueryShardContext()); if (supportedStarTree != null) { - if (preComputeWithStarTree(ctx, supportedStarTree) == true) { - return true; - } + StarTreeBucketCollector starTreeBucketCollector = getStarTreeBucketCollector(ctx, supportedStarTree, null); + StarTreeQueryHelper.preComputeBucketsWithStarTree(starTreeBucketCollector); } return filterRewriteOptimizationContext.tryOptimize(ctx, this::incrementBucketDocCount, segmentMatchAll(context, ctx)); } @@ -268,6 +263,10 @@ public StarTreeBucketCollector getStarTreeBucketCollector( ) throws IOException { assert parentCollector == null; StarTreeValues starTreeValues = StarTreeQueryHelper.getStarTreeValues(ctx, starTree); + SortedNumericStarTreeValuesIterator valuesIterator = (SortedNumericStarTreeValuesIterator) starTreeValues + .getDimensionValuesIterator(starTreeDateDimension); + SortedNumericStarTreeValuesIterator docCountsIterator = StarTreeQueryHelper.getDocCountsIterator(starTreeValues, starTree); + return new StarTreeBucketCollector( starTreeValues, StarTreeTraversalUtil.getStarTreeResult( @@ -287,17 +286,6 @@ public void setSubCollectors() throws IOException { } } - SortedNumericStarTreeValuesIterator valuesIterator = (SortedNumericStarTreeValuesIterator) starTreeValues - .getDimensionValuesIterator(starTreeDateDimension); - - String metricName = StarTreeUtils.fullyQualifiedFieldNameForStarTreeMetricsDocValues( - starTree.getField(), - "_doc_count", - MetricStat.DOC_COUNT.getTypeName() - ); - SortedNumericStarTreeValuesIterator docCountsIterator = (SortedNumericStarTreeValuesIterator) starTreeValues - .getMetricValuesIterator(metricName); - @Override public void collectStarTreeEntry(int starTreeEntry, long owningBucketOrd) throws IOException { if (!valuesIterator.advanceExact(starTreeEntry)) { @@ -311,15 +299,8 @@ public void collectStarTreeEntry(int starTreeEntry, long owningBucketOrd) throws if (docCountsIterator.advanceExact(starTreeEntry)) { long metricValue = docCountsIterator.nextValue(); - long bucketOrd = bucketOrds.add(owningBucketOrd, dimensionValue); - if (bucketOrd < 0) { - bucketOrd = -1 - bucketOrd; - collectStarTreeBucket(this, metricValue, bucketOrd, starTreeEntry); - } else { - grow(bucketOrd + 1); - collectStarTreeBucket(this, metricValue, bucketOrd, starTreeEntry); - } + collectStarTreeBucket(this, metricValue, bucketOrd, starTreeEntry); } } } @@ -393,20 +374,4 @@ public double bucketSize(long bucket, Rounding.DateTimeUnit unitSize) { return 1.0; } } - - private boolean preComputeWithStarTree(LeafReaderContext ctx, CompositeIndexFieldInfo starTree) throws IOException { - StarTreeBucketCollector starTreeBucketCollector = getStarTreeBucketCollector(ctx, starTree, null); - FixedBitSet matchingDocsBitSet = starTreeBucketCollector.getMatchingDocsBitSet(); - - int numBits = matchingDocsBitSet.length(); - - if (numBits > 0) { - for (int bit = matchingDocsBitSet.nextSetBit(0); bit != DocIdSetIterator.NO_MORE_DOCS; bit = (bit + 1 < numBits) - ? matchingDocsBitSet.nextSetBit(bit + 1) - : DocIdSetIterator.NO_MORE_DOCS) { - starTreeBucketCollector.collectStarTreeEntry(bit, 0); - } - } - return true; - } } diff --git a/server/src/main/java/org/opensearch/search/aggregations/bucket/terms/GlobalOrdinalsStringTermsAggregator.java b/server/src/main/java/org/opensearch/search/aggregations/bucket/terms/GlobalOrdinalsStringTermsAggregator.java index ef925b7f6416a..03dbdbd290d8d 100644 --- a/server/src/main/java/org/opensearch/search/aggregations/bucket/terms/GlobalOrdinalsStringTermsAggregator.java +++ b/server/src/main/java/org/opensearch/search/aggregations/bucket/terms/GlobalOrdinalsStringTermsAggregator.java @@ -51,6 +51,10 @@ import org.opensearch.common.util.LongHash; import org.opensearch.core.common.io.stream.StreamOutput; import org.opensearch.core.xcontent.XContentBuilder; +import org.opensearch.index.codec.composite.CompositeIndexFieldInfo; +import org.opensearch.index.compositeindex.datacube.startree.index.StarTreeValues; +import org.opensearch.index.compositeindex.datacube.startree.utils.iterator.SortedNumericStarTreeValuesIterator; +import org.opensearch.index.compositeindex.datacube.startree.utils.iterator.SortedSetStarTreeValuesIterator; import org.opensearch.index.mapper.DocCountFieldMapper; import org.opensearch.search.DocValueFormat; import org.opensearch.search.aggregations.AggregationExecutionException; @@ -63,14 +67,20 @@ import org.opensearch.search.aggregations.InternalOrder; import org.opensearch.search.aggregations.LeafBucketCollector; import org.opensearch.search.aggregations.LeafBucketCollectorBase; +import org.opensearch.search.aggregations.StarTreeBucketCollector; +import org.opensearch.search.aggregations.StarTreePreComputeCollector; import org.opensearch.search.aggregations.bucket.LocalBucketCountThresholds; import org.opensearch.search.aggregations.bucket.terms.SignificanceLookup.BackgroundFrequencyForBytes; import org.opensearch.search.aggregations.bucket.terms.heuristic.SignificanceHeuristic; import org.opensearch.search.aggregations.support.ValuesSource; import org.opensearch.search.internal.SearchContext; +import org.opensearch.search.startree.StarTreeQueryHelper; +import org.opensearch.search.startree.StarTreeTraversalUtil; +import org.opensearch.search.startree.filter.DimensionFilter; import java.io.IOException; import java.util.Arrays; +import java.util.List; import java.util.Map; import java.util.function.BiConsumer; import java.util.function.Function; @@ -85,7 +95,7 @@ * * @opensearch.internal */ -public class GlobalOrdinalsStringTermsAggregator extends AbstractStringTermsAggregator { +public class GlobalOrdinalsStringTermsAggregator extends AbstractStringTermsAggregator implements StarTreePreComputeCollector { protected final ResultStrategy resultStrategy; protected final ValuesSource.Bytes.WithOrdinals valuesSource; @@ -97,6 +107,7 @@ public class GlobalOrdinalsStringTermsAggregator extends AbstractStringTermsAggr private final SetOnce dvs = new SetOnce<>(); protected int segmentsWithSingleValuedOrds = 0; protected int segmentsWithMultiValuedOrds = 0; + LongUnaryOperator globalOperator; /** * Lookup global ordinals @@ -228,6 +239,12 @@ protected boolean tryPrecomputeAggregationForLeaf(LeafReaderContext ctx) throws (ord, docCount) -> incrementBucketDocCount(collectionStrategy.globalOrdToBucketOrd(0, ord), docCount) ); } + CompositeIndexFieldInfo supportedStarTree = StarTreeQueryHelper.getSupportedStarTree(this.context.getQueryShardContext()); + if (supportedStarTree != null) { + globalOperator = valuesSource.globalOrdinalsMapping(ctx); + StarTreeBucketCollector starTreeBucketCollector = getStarTreeBucketCollector(ctx, supportedStarTree, null); + StarTreeQueryHelper.preComputeBucketsWithStarTree(starTreeBucketCollector); + } return false; } @@ -307,6 +324,56 @@ public void collect(int doc, long owningBucketOrd) throws IOException { }); } + public StarTreeBucketCollector getStarTreeBucketCollector( + LeafReaderContext ctx, + CompositeIndexFieldInfo starTree, + StarTreeBucketCollector parent + ) throws IOException { + assert parent == null; + StarTreeValues starTreeValues = StarTreeQueryHelper.getStarTreeValues(ctx, starTree); + SortedSetStarTreeValuesIterator valuesIterator = (SortedSetStarTreeValuesIterator) starTreeValues.getDimensionValuesIterator( + fieldName + ); + SortedNumericStarTreeValuesIterator docCountsIterator = StarTreeQueryHelper.getDocCountsIterator(starTreeValues, starTree); + + return new StarTreeBucketCollector( + starTreeValues, + StarTreeTraversalUtil.getStarTreeResult( + starTreeValues, + StarTreeQueryHelper.mergeDimensionFilterIfNotExists( + context.getQueryShardContext().getStarTreeQueryContext().getBaseQueryStarTreeFilter(), + fieldName, + List.of(DimensionFilter.MATCH_ALL_DEFAULT) + ), + context + ) + ) { + @Override + public void setSubCollectors() throws IOException { + for (Aggregator aggregator : subAggregators) { + this.subCollectors.add(((StarTreePreComputeCollector) aggregator).getStarTreeBucketCollector(ctx, starTree, this)); + } + } + + @Override + public void collectStarTreeEntry(int starTreeEntry, long owningBucketOrd) throws IOException { + if (valuesIterator.advanceExact(starTreeEntry) == false) { + return; + } + for (int i = 0, count = valuesIterator.docValueCount(); i < count; i++) { + long dimensionValue = valuesIterator.value(); + long ord = globalOperator.applyAsLong(dimensionValue); + + if (docCountsIterator.advanceExact(starTreeEntry)) { + long metricValue = docCountsIterator.nextValue(); + long bucketOrd = collectionStrategy.globalOrdToBucketOrd(0, ord); + collectStarTreeBucket(this, metricValue, bucketOrd, starTreeEntry); + } + } + } + }; + } + @Override public InternalAggregation[] buildAggregations(long[] owningBucketOrds) throws IOException { return resultStrategy.buildAggregations(owningBucketOrds); diff --git a/server/src/main/java/org/opensearch/search/aggregations/bucket/terms/NumericTermsAggregator.java b/server/src/main/java/org/opensearch/search/aggregations/bucket/terms/NumericTermsAggregator.java index 1d78a59a563f0..635180870733d 100644 --- a/server/src/main/java/org/opensearch/search/aggregations/bucket/terms/NumericTermsAggregator.java +++ b/server/src/main/java/org/opensearch/search/aggregations/bucket/terms/NumericTermsAggregator.java @@ -41,7 +41,11 @@ import org.opensearch.common.lease.Releasable; import org.opensearch.common.lease.Releasables; import org.opensearch.common.util.LongArray; +import org.opensearch.index.codec.composite.CompositeIndexFieldInfo; +import org.opensearch.index.compositeindex.datacube.startree.index.StarTreeValues; +import org.opensearch.index.compositeindex.datacube.startree.utils.iterator.SortedNumericStarTreeValuesIterator; import org.opensearch.index.fielddata.FieldData; +import org.opensearch.index.mapper.NumberFieldMapper; import org.opensearch.search.DocValueFormat; import org.opensearch.search.aggregations.Aggregator; import org.opensearch.search.aggregations.AggregatorFactories; @@ -52,6 +56,8 @@ import org.opensearch.search.aggregations.InternalOrder; import org.opensearch.search.aggregations.LeafBucketCollector; import org.opensearch.search.aggregations.LeafBucketCollectorBase; +import org.opensearch.search.aggregations.StarTreeBucketCollector; +import org.opensearch.search.aggregations.StarTreePreComputeCollector; import org.opensearch.search.aggregations.bucket.LocalBucketCountThresholds; import org.opensearch.search.aggregations.bucket.terms.IncludeExclude.LongFilter; import org.opensearch.search.aggregations.bucket.terms.LongKeyedBucketOrds.BucketOrdsEnum; @@ -60,6 +66,9 @@ import org.opensearch.search.aggregations.support.ValuesSource; import org.opensearch.search.internal.ContextIndexSearcher; import org.opensearch.search.internal.SearchContext; +import org.opensearch.search.startree.StarTreeQueryHelper; +import org.opensearch.search.startree.StarTreeTraversalUtil; +import org.opensearch.search.startree.filter.DimensionFilter; import java.io.IOException; import java.math.BigInteger; @@ -79,11 +88,12 @@ * * @opensearch.internal */ -public class NumericTermsAggregator extends TermsAggregator { +public class NumericTermsAggregator extends TermsAggregator implements StarTreePreComputeCollector { private final ResultStrategy resultStrategy; private final ValuesSource.Numeric valuesSource; private final LongKeyedBucketOrds bucketOrds; private final LongFilter longFilter; + private final String fieldName; public NumericTermsAggregator( String name, @@ -105,6 +115,9 @@ public NumericTermsAggregator( this.valuesSource = valuesSource; this.longFilter = longFilter; bucketOrds = LongKeyedBucketOrds.build(context.bigArrays(), cardinality); + this.fieldName = (this.valuesSource instanceof ValuesSource.Numeric.FieldData) + ? ((ValuesSource.Numeric.FieldData) valuesSource).getIndexFieldName() + : null; } @Override @@ -146,6 +159,72 @@ public void collect(int doc, long owningBucketOrd) throws IOException { }); } + protected boolean tryPrecomputeAggregationForLeaf(LeafReaderContext ctx) throws IOException { + CompositeIndexFieldInfo supportedStarTree = StarTreeQueryHelper.getSupportedStarTree(this.context.getQueryShardContext()); + if (supportedStarTree != null) { + StarTreeBucketCollector starTreeBucketCollector = getStarTreeBucketCollector(ctx, supportedStarTree, null); + StarTreeQueryHelper.preComputeBucketsWithStarTree(starTreeBucketCollector); + } + return false; + } + + public StarTreeBucketCollector getStarTreeBucketCollector( + LeafReaderContext ctx, + CompositeIndexFieldInfo starTree, + StarTreeBucketCollector parent + ) throws IOException { + assert parent == null; + StarTreeValues starTreeValues = StarTreeQueryHelper.getStarTreeValues(ctx, starTree); + SortedNumericStarTreeValuesIterator valuesIterator = (SortedNumericStarTreeValuesIterator) starTreeValues + .getDimensionValuesIterator(fieldName); + SortedNumericStarTreeValuesIterator docCountsIterator = StarTreeQueryHelper.getDocCountsIterator(starTreeValues, starTree); + + return new StarTreeBucketCollector( + starTreeValues, + StarTreeTraversalUtil.getStarTreeResult( + starTreeValues, + StarTreeQueryHelper.mergeDimensionFilterIfNotExists( + context.getQueryShardContext().getStarTreeQueryContext().getBaseQueryStarTreeFilter(), + fieldName, + List.of(DimensionFilter.MATCH_ALL_DEFAULT) + ), + context + ) + ) { + @Override + public void setSubCollectors() throws IOException { + for (Aggregator aggregator : subAggregators) { + this.subCollectors.add(((StarTreePreComputeCollector) aggregator).getStarTreeBucketCollector(ctx, starTree, this)); + } + } + + @Override + public void collectStarTreeEntry(int starTreeEntry, long owningBucketOrd) throws IOException { + if (valuesIterator.advanceExact(starTreeEntry) == false) { + return; + } + long dimensionValue = valuesIterator.nextValue(); + // Only numeric & floating points are supported as of now in star-tree + // TODO: Add support for isBigInteger() when it gets supported in star-tree + if (valuesSource.isFloatingPoint()) { + double doubleValue = ((NumberFieldMapper.NumberFieldType) context.mapperService().fieldType(fieldName)).toDoubleValue( + dimensionValue + ); + dimensionValue = NumericUtils.doubleToSortableLong(doubleValue); + } + + for (int i = 0, count = valuesIterator.entryValueCount(); i < count; i++) { + + if (docCountsIterator.advanceExact(starTreeEntry)) { + long metricValue = docCountsIterator.nextValue(); + long bucketOrd = bucketOrds.add(owningBucketOrd, dimensionValue); + collectStarTreeBucket(this, metricValue, bucketOrd, starTreeEntry); + } + } + } + }; + } + @Override public InternalAggregation[] buildAggregations(long[] owningBucketOrds) throws IOException { return resultStrategy.buildAggregations(owningBucketOrds); diff --git a/server/src/main/java/org/opensearch/search/aggregations/bucket/terms/TermsAggregator.java b/server/src/main/java/org/opensearch/search/aggregations/bucket/terms/TermsAggregator.java index 918cc0276ed13..e73012d83ea14 100644 --- a/server/src/main/java/org/opensearch/search/aggregations/bucket/terms/TermsAggregator.java +++ b/server/src/main/java/org/opensearch/search/aggregations/bucket/terms/TermsAggregator.java @@ -291,6 +291,10 @@ private boolean subAggsNeedScore() { @Override protected boolean shouldDefer(Aggregator aggregator) { - return collectMode == SubAggCollectionMode.BREADTH_FIRST && !aggsUsedForSorting.contains(aggregator); + // don't defer when StarTreeContext is set, don't defer when collectMode == SubAggCollectionMode.BREADTH_FIRST + // this boolean condition can be further simplified but affects readability. + return (context.getQueryShardContext().getStarTreeQueryContext() == null || collectMode != SubAggCollectionMode.BREADTH_FIRST) + && collectMode == SubAggCollectionMode.BREADTH_FIRST + && !aggsUsedForSorting.contains(aggregator); } } diff --git a/server/src/main/java/org/opensearch/search/startree/StarTreeQueryContext.java b/server/src/main/java/org/opensearch/search/startree/StarTreeQueryContext.java index ca0ab9ce52f6e..a7b7cf62ff3be 100644 --- a/server/src/main/java/org/opensearch/search/startree/StarTreeQueryContext.java +++ b/server/src/main/java/org/opensearch/search/startree/StarTreeQueryContext.java @@ -21,6 +21,7 @@ import org.opensearch.index.query.QueryBuilder; import org.opensearch.search.aggregations.AggregatorFactory; import org.opensearch.search.aggregations.bucket.histogram.DateHistogramAggregatorFactory; +import org.opensearch.search.aggregations.bucket.terms.TermsAggregatorFactory; import org.opensearch.search.aggregations.metrics.MetricAggregatorFactory; import org.opensearch.search.internal.SearchContext; import org.opensearch.search.startree.filter.StarTreeFilter; @@ -113,6 +114,13 @@ public boolean consolidateAllFilters(SearchContext context) { if (validateDateHistogramSupport(compositeMappedFieldType, aggregatorFactory)) { continue; } + + // validation for terms aggregation + if (validateKeywordTermsAggregationSupport(compositeMappedFieldType, aggregatorFactory)) { + continue; + } + + // invalid query shape return false; } @@ -151,6 +159,32 @@ private static boolean validateStarTreeMetricSupport( return false; } + private static boolean validateKeywordTermsAggregationSupport( + CompositeDataCubeFieldType compositeIndexFieldInfo, + AggregatorFactory aggregatorFactory + ) { + if (!(aggregatorFactory instanceof TermsAggregatorFactory termsAggregatorFactory) + || aggregatorFactory.getSubFactories().getFactories().length < 1) { + return false; + } + + // Validate request field is part of dimensions + if (compositeIndexFieldInfo.getDimensions() + .stream() + .map(Dimension::getField) + .noneMatch(termsAggregatorFactory.getField()::equals)) { + return false; + } + + // Validate all sub-factories + for (AggregatorFactory subFactory : aggregatorFactory.getSubFactories().getFactories()) { + if (!validateStarTreeMetricSupport(compositeIndexFieldInfo, subFactory)) { + return false; + } + } + return true; + } + private StarTreeFilter getStarTreeFilter( SearchContext context, QueryBuilder queryBuilder, diff --git a/server/src/main/java/org/opensearch/search/startree/StarTreeQueryHelper.java b/server/src/main/java/org/opensearch/search/startree/StarTreeQueryHelper.java index 0e3bc220461b9..68a613a373edf 100644 --- a/server/src/main/java/org/opensearch/search/startree/StarTreeQueryHelper.java +++ b/server/src/main/java/org/opensearch/search/startree/StarTreeQueryHelper.java @@ -16,9 +16,11 @@ import org.opensearch.index.codec.composite.CompositeIndexFieldInfo; import org.opensearch.index.codec.composite.CompositeIndexReader; import org.opensearch.index.compositeindex.datacube.Dimension; +import org.opensearch.index.compositeindex.datacube.MetricStat; import org.opensearch.index.compositeindex.datacube.startree.index.StarTreeValues; import org.opensearch.index.compositeindex.datacube.startree.utils.StarTreeUtils; import org.opensearch.index.compositeindex.datacube.startree.utils.iterator.SortedNumericStarTreeValuesIterator; +import org.opensearch.index.mapper.DocCountFieldMapper; import org.opensearch.index.query.QueryShardContext; import org.opensearch.search.aggregations.StarTreeBucketCollector; import org.opensearch.search.aggregations.support.ValuesSource; @@ -177,6 +179,37 @@ public void collectStarTreeEntry(int starTreeEntryBit, long bucket) throws IOExc }; } + /** + * Fetches the metric values iterator for document counts from StarTreeValues. + */ + public static SortedNumericStarTreeValuesIterator getDocCountsIterator( + StarTreeValues starTreeValues, + CompositeIndexFieldInfo starTree + ) { + String metricName = StarTreeUtils.fullyQualifiedFieldNameForStarTreeMetricsDocValues( + starTree.getField(), + DocCountFieldMapper.NAME, + MetricStat.DOC_COUNT.getTypeName() + ); + return (SortedNumericStarTreeValuesIterator) starTreeValues.getMetricValuesIterator(metricName); + } + + /** + * For a StarTreeBucketCollector, get matching star-tree entries and update relevant buckets in aggregator + */ + public static void preComputeBucketsWithStarTree(StarTreeBucketCollector starTreeBucketCollector) throws IOException { + FixedBitSet matchingDocsBitSet = starTreeBucketCollector.getMatchingDocsBitSet(); + int numBits = matchingDocsBitSet.length(); + + if (numBits > 0) { + for (int bit = matchingDocsBitSet.nextSetBit(0); bit != DocIdSetIterator.NO_MORE_DOCS; bit = (bit + 1 < numBits) + ? matchingDocsBitSet.nextSetBit(bit + 1) + : DocIdSetIterator.NO_MORE_DOCS) { + starTreeBucketCollector.collectStarTreeEntry(bit, 0); + } + } + } + public static StarTreeFilter mergeDimensionFilterIfNotExists( StarTreeFilter baseStarTreeFilter, String dimensionToMerge, diff --git a/server/src/test/java/org/opensearch/search/SearchServiceStarTreeTests.java b/server/src/test/java/org/opensearch/search/SearchServiceStarTreeTests.java index 93b133c0302c9..95c877bfce0a8 100644 --- a/server/src/test/java/org/opensearch/search/SearchServiceStarTreeTests.java +++ b/server/src/test/java/org/opensearch/search/SearchServiceStarTreeTests.java @@ -47,10 +47,12 @@ import org.opensearch.search.aggregations.SearchContextAggregations; import org.opensearch.search.aggregations.bucket.histogram.DateHistogramAggregationBuilder; import org.opensearch.search.aggregations.bucket.histogram.DateHistogramInterval; +import org.opensearch.search.aggregations.bucket.terms.TermsAggregationBuilder; import org.opensearch.search.aggregations.metrics.MaxAggregationBuilder; import org.opensearch.search.aggregations.metrics.MedianAbsoluteDeviationAggregationBuilder; import org.opensearch.search.aggregations.metrics.SumAggregationBuilder; import org.opensearch.search.aggregations.startree.DateHistogramAggregatorTests; +import org.opensearch.search.aggregations.startree.NumericTermsAggregatorTests; import org.opensearch.search.aggregations.startree.StarTreeFilterTests; import org.opensearch.search.aggregations.support.ValuesSourceAggregationBuilder; import org.opensearch.search.builder.SearchSourceBuilder; @@ -70,6 +72,7 @@ import static org.opensearch.search.aggregations.AggregationBuilders.max; import static org.opensearch.search.aggregations.AggregationBuilders.medianAbsoluteDeviation; import static org.opensearch.search.aggregations.AggregationBuilders.sum; +import static org.opensearch.search.aggregations.AggregationBuilders.terms; import static org.hamcrest.CoreMatchers.notNullValue; import static org.hamcrest.CoreMatchers.nullValue; import static org.mockito.Mockito.mock; @@ -539,6 +542,153 @@ public void testInvalidQueryParsingForDateHistogramAggregations() throws IOExcep setStarTreeIndexSetting(null); } + /** + * Test query parsing for bucket aggregations, with/without numeric term query + */ + public void testQueryParsingForBucketAggregations() throws IOException { + FeatureFlags.initializeFeatureFlags(Settings.builder().put(FeatureFlags.STAR_TREE_INDEX, true).build()); + setStarTreeIndexSetting("true"); + + Settings settings = Settings.builder() + .put(IndexMetadata.SETTING_NUMBER_OF_SHARDS, 1) + .put(IndexMetadata.SETTING_NUMBER_OF_REPLICAS, 1) + .put(StarTreeIndexSettings.IS_COMPOSITE_INDEX_SETTING.getKey(), true) + .put(IndexMetadata.INDEX_APPEND_ONLY_ENABLED_SETTING.getKey(), true) + .build(); + CreateIndexRequestBuilder builder = client().admin() + .indices() + .prepareCreate("test") + .setSettings(settings) + .setMapping(NumericTermsAggregatorTests.getExpandedMapping(1, false)); + createIndex("test", builder); + + IndicesService indicesService = getInstanceFromNode(IndicesService.class); + IndexService indexService = indicesService.indexServiceSafe(resolveIndex("test")); + IndexShard indexShard = indexService.getShard(0); + ShardSearchRequest request = new ShardSearchRequest( + OriginalIndices.NONE, + new SearchRequest().allowPartialSearchResults(true), + indexShard.shardId(), + 1, + new AliasFilter(null, Strings.EMPTY_ARRAY), + 1.0f, + -1, + null, + null + ); + String KEYWORD_FIELD = "clientip"; + String NUMERIC_FIELD = "size"; + + MaxAggregationBuilder maxAggNoSub = max("max").field(FIELD_NAME); + MaxAggregationBuilder sumAggNoSub = max("sum").field(FIELD_NAME); + SumAggregationBuilder sumAggSub = sum("sum").field(FIELD_NAME).subAggregation(maxAggNoSub); + MedianAbsoluteDeviationAggregationBuilder medianAgg = medianAbsoluteDeviation("median").field(FIELD_NAME); + + QueryBuilder baseQuery; + SearchContext searchContext = createSearchContext(indexService); + StarTreeFieldConfiguration starTreeFieldConfiguration = new StarTreeFieldConfiguration( + 1, + Collections.emptySet(), + StarTreeFieldConfiguration.StarTreeBuildMode.ON_HEAP + ); + + // Case 1: MatchAllQuery and non-nested metric aggregations is nested within keyword term aggregation, should use star tree + TermsAggregationBuilder termsAggregationBuilder = terms("term").field(KEYWORD_FIELD).subAggregation(maxAggNoSub); + baseQuery = new MatchAllQueryBuilder(); + SearchSourceBuilder sourceBuilder = new SearchSourceBuilder().size(0).query(baseQuery).aggregation(termsAggregationBuilder); + + assertStarTreeContext( + request, + sourceBuilder, + getStarTreeQueryContext( + searchContext, + starTreeFieldConfiguration, + "startree1", + -1, + List.of(new NumericDimension(NUMERIC_FIELD), new OrdinalDimension(KEYWORD_FIELD)), + List.of(new Metric(FIELD_NAME, List.of(MetricStat.SUM, MetricStat.MAX))), + baseQuery, + sourceBuilder, + true + ), + -1 + ); + + // Case 2: MatchAllQuery and non-nested metric aggregations is nested within numeric term aggregation, should use star tree + termsAggregationBuilder = terms("term").field(NUMERIC_FIELD).subAggregation(maxAggNoSub); + sourceBuilder = new SearchSourceBuilder().size(0).query(new MatchAllQueryBuilder()).aggregation(termsAggregationBuilder); + assertStarTreeContext( + request, + sourceBuilder, + getStarTreeQueryContext( + searchContext, + starTreeFieldConfiguration, + "startree1", + -1, + List.of(new NumericDimension(NUMERIC_FIELD), new OrdinalDimension(KEYWORD_FIELD)), + List.of(new Metric(FIELD_NAME, List.of(MetricStat.SUM, MetricStat.MAX))), + baseQuery, + sourceBuilder, + true + ), + -1 + ); + + // Case 3: NumericTermsQuery and non-nested metric aggregations is nested within keyword term aggregation, should use star tree + termsAggregationBuilder = terms("term").field(KEYWORD_FIELD).subAggregation(maxAggNoSub); + baseQuery = new TermQueryBuilder(FIELD_NAME, 1); + sourceBuilder = new SearchSourceBuilder().size(0).query(baseQuery).aggregation(termsAggregationBuilder); + assertStarTreeContext( + request, + sourceBuilder, + getStarTreeQueryContext( + searchContext, + starTreeFieldConfiguration, + "startree1", + -1, + List.of(new NumericDimension(NUMERIC_FIELD), new OrdinalDimension(KEYWORD_FIELD), new NumericDimension(FIELD_NAME)), + List.of(new Metric(FIELD_NAME, List.of(MetricStat.SUM, MetricStat.MAX))), + baseQuery, + sourceBuilder, + true + ), + -1 + ); + + // Case 4: NumericTermsQuery and multiple non-nested metric aggregations is within numeric term aggregation, should use star tree + termsAggregationBuilder = terms("term").field(NUMERIC_FIELD).subAggregation(maxAggNoSub).subAggregation(sumAggNoSub); + sourceBuilder = new SearchSourceBuilder().size(0).query(new TermQueryBuilder(FIELD_NAME, 1)).aggregation(termsAggregationBuilder); + + assertStarTreeContext( + request, + sourceBuilder, + getStarTreeQueryContext( + searchContext, + starTreeFieldConfiguration, + "startree1", + -1, + List.of(new NumericDimension(NUMERIC_FIELD), new OrdinalDimension(KEYWORD_FIELD), new NumericDimension(FIELD_NAME)), + List.of(new Metric(FIELD_NAME, List.of(MetricStat.SUM, MetricStat.MAX))), + baseQuery, + sourceBuilder, + true + ), + -1 + ); + + // Case 5: Nested metric aggregations is nested within numeric term aggregation, should not use star tree + termsAggregationBuilder = terms("term").field(NUMERIC_FIELD).subAggregation(sumAggSub); + sourceBuilder = new SearchSourceBuilder().size(0).query(new TermQueryBuilder(FIELD_NAME, 1)).aggregation(termsAggregationBuilder); + assertStarTreeContext(request, sourceBuilder, null, -1); + + // Case 6: Unsupported aggregations is nested within numeric term aggregation, should not use star tree + termsAggregationBuilder = terms("term").field(NUMERIC_FIELD).subAggregation(medianAgg); + sourceBuilder = new SearchSourceBuilder().size(0).query(new TermQueryBuilder(FIELD_NAME, 1)).aggregation(termsAggregationBuilder); + assertStarTreeContext(request, sourceBuilder, null, -1); + + setStarTreeIndexSetting(null); + } + private void setStarTreeIndexSetting(String value) { client().admin() .cluster() diff --git a/server/src/test/java/org/opensearch/search/aggregations/startree/KeywordTermsAggregatorTests.java b/server/src/test/java/org/opensearch/search/aggregations/startree/KeywordTermsAggregatorTests.java new file mode 100644 index 0000000000000..9b1627e5b3fe5 --- /dev/null +++ b/server/src/test/java/org/opensearch/search/aggregations/startree/KeywordTermsAggregatorTests.java @@ -0,0 +1,239 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.search.aggregations.startree; + +import com.carrotsearch.randomizedtesting.RandomizedTest; + +import org.apache.logging.log4j.LogManager; +import org.apache.logging.log4j.Logger; +import org.apache.lucene.codecs.Codec; +import org.apache.lucene.codecs.lucene101.Lucene101Codec; +import org.apache.lucene.document.Document; +import org.apache.lucene.document.Field; +import org.apache.lucene.document.SortedNumericDocValuesField; +import org.apache.lucene.document.SortedSetDocValuesField; +import org.apache.lucene.document.StringField; +import org.apache.lucene.index.DirectoryReader; +import org.apache.lucene.index.IndexWriterConfig; +import org.apache.lucene.index.LeafReaderContext; +import org.apache.lucene.index.SegmentReader; +import org.apache.lucene.search.IndexSearcher; +import org.apache.lucene.search.MatchAllDocsQuery; +import org.apache.lucene.search.Query; +import org.apache.lucene.store.Directory; +import org.apache.lucene.tests.index.RandomIndexWriter; +import org.apache.lucene.util.BytesRef; +import org.apache.lucene.util.NumericUtils; +import org.opensearch.common.lucene.Lucene; +import org.opensearch.common.settings.Settings; +import org.opensearch.common.util.FeatureFlags; +import org.opensearch.index.codec.composite.CompositeIndexFieldInfo; +import org.opensearch.index.codec.composite.CompositeIndexReader; +import org.opensearch.index.codec.composite.composite101.Composite101Codec; +import org.opensearch.index.codec.composite912.datacube.startree.StarTreeDocValuesFormatTests; +import org.opensearch.index.compositeindex.datacube.Dimension; +import org.opensearch.index.compositeindex.datacube.NumericDimension; +import org.opensearch.index.compositeindex.datacube.OrdinalDimension; +import org.opensearch.index.mapper.KeywordFieldMapper; +import org.opensearch.index.mapper.MappedFieldType; +import org.opensearch.index.mapper.MapperService; +import org.opensearch.index.mapper.NumberFieldMapper; +import org.opensearch.index.query.QueryBuilder; +import org.opensearch.index.query.TermQueryBuilder; +import org.opensearch.search.aggregations.Aggregator; +import org.opensearch.search.aggregations.AggregatorTestCase; +import org.opensearch.search.aggregations.bucket.terms.InternalTerms; +import org.opensearch.search.aggregations.bucket.terms.TermsAggregationBuilder; +import org.opensearch.search.aggregations.support.ValuesSourceAggregationBuilder; +import org.junit.After; +import org.junit.Before; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.LinkedHashMap; +import java.util.List; +import java.util.Random; + +import static org.opensearch.search.aggregations.AggregationBuilders.avg; +import static org.opensearch.search.aggregations.AggregationBuilders.count; +import static org.opensearch.search.aggregations.AggregationBuilders.max; +import static org.opensearch.search.aggregations.AggregationBuilders.min; +import static org.opensearch.search.aggregations.AggregationBuilders.sum; +import static org.opensearch.search.aggregations.AggregationBuilders.terms; +import static org.opensearch.test.InternalAggregationTestCase.DEFAULT_MAX_BUCKETS; + +public class KeywordTermsAggregatorTests extends AggregatorTestCase { + final static String STATUS = "status"; + final static String SIZE = "size"; + final static String CLIENTIP = "clientip"; + private static final MappedFieldType STATUS_FIELD_TYPE = new NumberFieldMapper.NumberFieldType( + STATUS, + NumberFieldMapper.NumberType.LONG + ); + private static final MappedFieldType SIZE_FIELD_NAME = new NumberFieldMapper.NumberFieldType(SIZE, NumberFieldMapper.NumberType.FLOAT); + private static final MappedFieldType CLIENTIP_FIELD_NAME = new KeywordFieldMapper.KeywordFieldType(CLIENTIP); + + @Before + public void setup() { + FeatureFlags.initializeFeatureFlags(Settings.builder().put(FeatureFlags.STAR_TREE_INDEX, true).build()); + } + + @After + public void teardown() throws IOException { + FeatureFlags.initializeFeatureFlags(Settings.EMPTY); + } + + protected Codec getCodec() { + final Logger testLogger = LogManager.getLogger(KeywordTermsAggregatorTests.class); + MapperService mapperService; + try { + mapperService = StarTreeDocValuesFormatTests.createMapperService(NumericTermsAggregatorTests.getExpandedMapping(1, false)); + } catch (IOException e) { + throw new RuntimeException(e); + } + return new Composite101Codec(Lucene101Codec.Mode.BEST_SPEED, mapperService, testLogger); + } + + public void testStarTreeKeywordTerms() throws IOException { + Directory directory = newDirectory(); + IndexWriterConfig conf = newIndexWriterConfig(null); + conf.setCodec(getCodec()); + conf.setMergePolicy(newLogMergePolicy()); + RandomIndexWriter iw = new RandomIndexWriter(random(), directory, conf); + + Random random = RandomizedTest.getRandom(); + int totalDocs = 100; + + long val; + + List docs = new ArrayList<>(); + // Index 100 random documents + for (int i = 0; i < totalDocs; i++) { + Document doc = new Document(); + if (random.nextBoolean()) { + val = random.nextInt(10); // Random int between 0 and 9 for status + doc.add(new SortedNumericDocValuesField(STATUS, val)); + } + if (random.nextBoolean()) { + val = NumericUtils.doubleToSortableLong(random.nextInt(100) + 0.5f); + doc.add(new SortedNumericDocValuesField(SIZE, val)); + } + if (random.nextBoolean()) { + val = random.nextInt(10); // Random strings for int between 0 and 9 for clientip + doc.add(new SortedSetDocValuesField(CLIENTIP, new BytesRef(String.valueOf(val)))); + doc.add(new StringField(CLIENTIP, String.valueOf(val), Field.Store.NO)); + } + iw.addDocument(doc); + docs.add(doc); + } + + if (randomBoolean()) { + iw.forceMerge(1); + } + iw.close(); + DirectoryReader ir = DirectoryReader.open(directory); + LeafReaderContext context = ir.leaves().get(0); + + SegmentReader reader = Lucene.segmentReader(context.reader()); + IndexSearcher indexSearcher = newSearcher(wrapInMockESDirectoryReader(ir), false, false); + CompositeIndexReader starTreeDocValuesReader = (CompositeIndexReader) reader.getDocValuesReader(); + + List compositeIndexFields = starTreeDocValuesReader.getCompositeIndexFields(); + CompositeIndexFieldInfo starTree = compositeIndexFields.get(0); + + LinkedHashMap supportedDimensions = new LinkedHashMap<>(); + supportedDimensions.put(new NumericDimension(STATUS), STATUS_FIELD_TYPE); + supportedDimensions.put(new NumericDimension(SIZE), SIZE_FIELD_NAME); + supportedDimensions.put(new OrdinalDimension(CLIENTIP), CLIENTIP_FIELD_NAME); + + ValuesSourceAggregationBuilder[] aggBuilders = { + sum("_sum").field(SIZE), + max("_max").field(SIZE), + min("_min").field(SIZE), + count("_count").field(SIZE), + avg("_avg").field(SIZE) }; + + for (ValuesSourceAggregationBuilder aggregationBuilder : aggBuilders) { + Query query = new MatchAllDocsQuery(); + QueryBuilder queryBuilder = null; + + TermsAggregationBuilder termsAggregationBuilder = terms("terms_agg").field(CLIENTIP) + .subAggregation(aggregationBuilder) + .collectMode(Aggregator.SubAggCollectionMode.BREADTH_FIRST); + testCase(indexSearcher, query, queryBuilder, termsAggregationBuilder, starTree, supportedDimensions); + + // Numeric-terms query with keyword terms aggregation + for (int cases = 0; cases < 100; cases++) { + // query of status field + String queryField = STATUS; + long queryValue = random.nextInt(10); + query = SortedNumericDocValuesField.newSlowExactQuery(queryField, queryValue); + queryBuilder = new TermQueryBuilder(queryField, queryValue); + testCase(indexSearcher, query, queryBuilder, termsAggregationBuilder, starTree, supportedDimensions); + + // query on size field + queryField = SIZE; + queryValue = NumericUtils.floatToSortableInt(random.nextInt(20) - 14.5f); + query = SortedNumericDocValuesField.newSlowExactQuery(queryField, queryValue); + queryBuilder = new TermQueryBuilder(queryField, queryValue); + testCase(indexSearcher, query, queryBuilder, termsAggregationBuilder, starTree, supportedDimensions); + } + } + ir.close(); + directory.close(); + } + + private void testCase( + IndexSearcher indexSearcher, + Query query, + QueryBuilder queryBuilder, + TermsAggregationBuilder termsAggregationBuilder, + CompositeIndexFieldInfo starTree, + LinkedHashMap supportedDimensions + ) throws IOException { + InternalTerms starTreeAggregation = searchAndReduceStarTree( + createIndexSettings(), + indexSearcher, + query, + queryBuilder, + termsAggregationBuilder, + starTree, + supportedDimensions, + null, + DEFAULT_MAX_BUCKETS, + false, + null, + true, + STATUS_FIELD_TYPE, + SIZE_FIELD_NAME, + CLIENTIP_FIELD_NAME + ); + + InternalTerms defaultAggregation = searchAndReduceStarTree( + createIndexSettings(), + indexSearcher, + query, + queryBuilder, + termsAggregationBuilder, + null, + null, + null, + DEFAULT_MAX_BUCKETS, + false, + null, + false, + STATUS_FIELD_TYPE, + SIZE_FIELD_NAME, + CLIENTIP_FIELD_NAME + ); + + assertEquals(defaultAggregation.getBuckets().size(), starTreeAggregation.getBuckets().size()); + assertEquals(defaultAggregation.getBuckets(), starTreeAggregation.getBuckets()); + } +} diff --git a/server/src/test/java/org/opensearch/search/aggregations/startree/NumericTermsAggregatorTests.java b/server/src/test/java/org/opensearch/search/aggregations/startree/NumericTermsAggregatorTests.java new file mode 100644 index 0000000000000..a14d0a15aa31e --- /dev/null +++ b/server/src/test/java/org/opensearch/search/aggregations/startree/NumericTermsAggregatorTests.java @@ -0,0 +1,338 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.search.aggregations.startree; + +import com.carrotsearch.randomizedtesting.RandomizedTest; + +import org.apache.logging.log4j.LogManager; +import org.apache.logging.log4j.Logger; +import org.apache.lucene.codecs.Codec; +import org.apache.lucene.codecs.lucene101.Lucene101Codec; +import org.apache.lucene.document.Document; +import org.apache.lucene.document.SortedNumericDocValuesField; +import org.apache.lucene.index.DirectoryReader; +import org.apache.lucene.index.IndexWriterConfig; +import org.apache.lucene.index.LeafReaderContext; +import org.apache.lucene.index.SegmentReader; +import org.apache.lucene.search.IndexSearcher; +import org.apache.lucene.search.MatchAllDocsQuery; +import org.apache.lucene.search.Query; +import org.apache.lucene.store.Directory; +import org.apache.lucene.tests.index.RandomIndexWriter; +import org.apache.lucene.util.NumericUtils; +import org.opensearch.common.lucene.Lucene; +import org.opensearch.common.settings.Settings; +import org.opensearch.common.util.FeatureFlags; +import org.opensearch.core.xcontent.XContentBuilder; +import org.opensearch.index.codec.composite.CompositeIndexFieldInfo; +import org.opensearch.index.codec.composite.CompositeIndexReader; +import org.opensearch.index.codec.composite.composite101.Composite101Codec; +import org.opensearch.index.codec.composite912.datacube.startree.StarTreeDocValuesFormatTests; +import org.opensearch.index.compositeindex.datacube.Dimension; +import org.opensearch.index.compositeindex.datacube.NumericDimension; +import org.opensearch.index.mapper.MappedFieldType; +import org.opensearch.index.mapper.MapperService; +import org.opensearch.index.mapper.NumberFieldMapper; +import org.opensearch.index.query.QueryBuilder; +import org.opensearch.index.query.TermQueryBuilder; +import org.opensearch.search.aggregations.AggregatorTestCase; +import org.opensearch.search.aggregations.bucket.terms.InternalTerms; +import org.opensearch.search.aggregations.bucket.terms.TermsAggregationBuilder; +import org.opensearch.search.aggregations.support.ValuesSourceAggregationBuilder; +import org.junit.After; +import org.junit.Before; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.LinkedHashMap; +import java.util.List; +import java.util.Random; + +import static org.opensearch.index.codec.composite912.datacube.startree.AbstractStarTreeDVFormatTests.topMapping; +import static org.opensearch.search.aggregations.AggregationBuilders.avg; +import static org.opensearch.search.aggregations.AggregationBuilders.count; +import static org.opensearch.search.aggregations.AggregationBuilders.max; +import static org.opensearch.search.aggregations.AggregationBuilders.min; +import static org.opensearch.search.aggregations.AggregationBuilders.sum; +import static org.opensearch.search.aggregations.AggregationBuilders.terms; +import static org.opensearch.test.InternalAggregationTestCase.DEFAULT_MAX_BUCKETS; + +public class NumericTermsAggregatorTests extends AggregatorTestCase { + final static String STATUS = "status"; + final static String SIZE = "size"; + private static final MappedFieldType STATUS_FIELD_TYPE = new NumberFieldMapper.NumberFieldType( + STATUS, + NumberFieldMapper.NumberType.LONG + ); + private static final MappedFieldType SIZE_FIELD_NAME = new NumberFieldMapper.NumberFieldType(SIZE, NumberFieldMapper.NumberType.FLOAT); + + @Before + public void setup() { + FeatureFlags.initializeFeatureFlags(Settings.builder().put(FeatureFlags.STAR_TREE_INDEX, true).build()); + } + + @After + public void teardown() throws IOException { + FeatureFlags.initializeFeatureFlags(Settings.EMPTY); + } + + protected Codec getCodec() { + final Logger testLogger = LogManager.getLogger(NumericTermsAggregatorTests.class); + MapperService mapperService; + try { + mapperService = StarTreeDocValuesFormatTests.createMapperService(getExpandedMapping(1, false)); + } catch (IOException e) { + throw new RuntimeException(e); + } + return new Composite101Codec(Lucene101Codec.Mode.BEST_SPEED, mapperService, testLogger); + } + + public void testStarTreeNumericTerms() throws IOException { + Directory directory = newDirectory(); + IndexWriterConfig conf = newIndexWriterConfig(null); + conf.setCodec(getCodec()); + conf.setMergePolicy(newLogMergePolicy()); + RandomIndexWriter iw = new RandomIndexWriter(random(), directory, conf); + + Random random = RandomizedTest.getRandom(); + int totalDocs = 100; + + long val; + + List docs = new ArrayList<>(); + // Index 100 random documents + for (int i = 0; i < totalDocs; i++) { + Document doc = new Document(); + if (random.nextBoolean()) { + val = random.nextInt(10); // Random int between (0 and 9) for status + doc.add(new SortedNumericDocValuesField(STATUS, val)); + } + if (random.nextBoolean()) { + val = NumericUtils.doubleToSortableLong(random.nextInt(100) + 0.5f); + // Random float between (0 and 99)+0.5f for size + doc.add(new SortedNumericDocValuesField(SIZE, val)); + } + iw.addDocument(doc); + docs.add(doc); + } + + if (randomBoolean()) { + iw.forceMerge(1); + } + iw.close(); + DirectoryReader ir = DirectoryReader.open(directory); + LeafReaderContext context = ir.leaves().get(0); + + SegmentReader reader = Lucene.segmentReader(context.reader()); + IndexSearcher indexSearcher = newSearcher(reader, false, false); + CompositeIndexReader starTreeDocValuesReader = (CompositeIndexReader) reader.getDocValuesReader(); + + List compositeIndexFields = starTreeDocValuesReader.getCompositeIndexFields(); + CompositeIndexFieldInfo starTree = compositeIndexFields.get(0); + + LinkedHashMap supportedDimensions = new LinkedHashMap<>(); + supportedDimensions.put(new NumericDimension(STATUS), STATUS_FIELD_TYPE); + supportedDimensions.put(new NumericDimension(SIZE), SIZE_FIELD_NAME); + + ValuesSourceAggregationBuilder[] aggBuilders = { + sum("_sum").field(SIZE), + max("_max").field(SIZE), + min("_min").field(SIZE), + count("_count").field(SIZE), + avg("_avg").field(SIZE) }; + + for (ValuesSourceAggregationBuilder aggregationBuilder : aggBuilders) { + Query query = new MatchAllDocsQuery(); + QueryBuilder queryBuilder = null; + + TermsAggregationBuilder termsAggregationBuilder = terms("terms_agg").field(STATUS).subAggregation(aggregationBuilder); + testCase(indexSearcher, query, queryBuilder, termsAggregationBuilder, starTree, supportedDimensions); + + // Numeric-terms query with numeric terms aggregation + for (int cases = 0; cases < 100; cases++) { + + // query of status field + String queryField = STATUS; + long queryValue = random.nextInt(10); + query = SortedNumericDocValuesField.newSlowExactQuery(queryField, queryValue); + queryBuilder = new TermQueryBuilder(queryField, queryValue); + testCase(indexSearcher, query, queryBuilder, termsAggregationBuilder, starTree, supportedDimensions); + + // query on size field + queryField = SIZE; + queryValue = NumericUtils.floatToSortableInt(random.nextInt(20) - 14.5f); + query = SortedNumericDocValuesField.newSlowExactQuery(queryField, queryValue); + queryBuilder = new TermQueryBuilder(queryField, queryValue); + testCase(indexSearcher, query, queryBuilder, termsAggregationBuilder, starTree, supportedDimensions); + } + } + + aggBuilders = new ValuesSourceAggregationBuilder[] { + sum("_sum").field(STATUS), + max("_max").field(STATUS), + min("_min").field(STATUS), + count("_count").field(STATUS), + avg("_avg").field(STATUS) }; + + for (ValuesSourceAggregationBuilder aggregationBuilder : aggBuilders) { + Query query = new MatchAllDocsQuery(); + QueryBuilder queryBuilder = null; + + TermsAggregationBuilder termsAggregationBuilder = terms("terms_agg").field(SIZE).subAggregation(aggregationBuilder); + testCase(indexSearcher, query, queryBuilder, termsAggregationBuilder, starTree, supportedDimensions); + } + + ir.close(); + directory.close(); + } + + private void testCase( + IndexSearcher indexSearcher, + Query query, + QueryBuilder queryBuilder, + TermsAggregationBuilder termsAggregationBuilder, + CompositeIndexFieldInfo starTree, + LinkedHashMap supportedDimensions + ) throws IOException { + InternalTerms starTreeAggregation = searchAndReduceStarTree( + createIndexSettings(), + indexSearcher, + query, + queryBuilder, + termsAggregationBuilder, + starTree, + supportedDimensions, + null, + DEFAULT_MAX_BUCKETS, + false, + null, + true, + STATUS_FIELD_TYPE, + SIZE_FIELD_NAME + ); + + InternalTerms defaultAggregation = searchAndReduceStarTree( + createIndexSettings(), + indexSearcher, + query, + queryBuilder, + termsAggregationBuilder, + null, + null, + null, + DEFAULT_MAX_BUCKETS, + false, + null, + false, + STATUS_FIELD_TYPE, + SIZE_FIELD_NAME + ); + + assertEquals(defaultAggregation.getBuckets().size(), starTreeAggregation.getBuckets().size()); + assertEquals(defaultAggregation.getBuckets(), starTreeAggregation.getBuckets()); + } + + public static XContentBuilder getExpandedMapping(int maxLeafDocs, boolean skipStarNodeCreationForStatusDimension) throws IOException { + return topMapping(b -> { + b.startObject("composite"); + b.startObject("startree1"); // Use the same name as the provided mapping + b.field("type", "star_tree"); + b.startObject("config"); + b.field("max_leaf_docs", maxLeafDocs); + if (skipStarNodeCreationForStatusDimension) { + b.startArray("skip_star_node_creation_for_dimensions"); + b.value("status"); // Skip for "status" dimension + b.endArray(); + } + b.startArray("ordered_dimensions"); + b.startObject(); + b.field("name", "status"); + b.endObject(); + b.startObject(); + b.field("name", "size"); + b.endObject(); + b.startObject(); + b.field("name", "clientip"); + b.endObject(); + b.startObject(); + b.field("name", "@timestamp"); + b.startArray("calendar_intervals"); + b.value("month"); + b.value("day"); + b.endArray(); + b.endObject(); + b.endArray(); + b.startArray("metrics"); + b.startObject(); + b.field("name", "size"); + b.startArray("stats"); + b.value("sum"); + b.value("value_count"); + b.value("min"); + b.value("max"); + b.endArray(); + b.endObject(); + b.startObject(); + b.field("name", "status"); + b.startArray("stats"); + b.value("sum"); + b.value("value_count"); + b.value("min"); + b.value("max"); + b.endArray(); + b.endObject(); + b.endArray(); + b.endObject(); + b.endObject(); + b.endObject(); + b.startObject("properties"); + b.startObject("@timestamp"); + b.field("type", "date"); + b.field("format", "strict_date_optional_time||epoch_second"); + b.endObject(); + b.startObject("message"); + b.field("type", "keyword"); + b.field("index", false); + b.field("doc_values", false); + b.endObject(); + b.startObject("clientip"); + b.field("type", "keyword"); + b.endObject(); + b.startObject("request"); + b.field("type", "text"); + b.startObject("fields"); + b.startObject("raw"); + b.field("type", "keyword"); + b.field("ignore_above", 256); + b.endObject(); + b.endObject(); + b.endObject(); + b.startObject("status"); + b.field("type", "integer"); + b.endObject(); + b.startObject("size"); + b.field("type", "float"); + b.endObject(); + b.startObject("geoip"); + b.startObject("properties"); + b.startObject("country_name"); + b.field("type", "keyword"); + b.endObject(); + b.startObject("city_name"); + b.field("type", "keyword"); + b.endObject(); + b.startObject("location"); + b.field("type", "geo_point"); + b.endObject(); + b.endObject(); + b.endObject(); + b.endObject(); + }); + } +} diff --git a/test/framework/src/main/java/org/opensearch/search/aggregations/AggregatorTestCase.java b/test/framework/src/main/java/org/opensearch/search/aggregations/AggregatorTestCase.java index 78e3d4f50a0d5..2f6bb11d59ca6 100644 --- a/test/framework/src/main/java/org/opensearch/search/aggregations/AggregatorTestCase.java +++ b/test/framework/src/main/java/org/opensearch/search/aggregations/AggregatorTestCase.java @@ -168,6 +168,7 @@ import java.util.function.Function; import java.util.function.Supplier; import java.util.stream.Collectors; +import java.util.stream.Stream; import static java.util.Collections.emptyMap; import static java.util.Collections.singletonList; @@ -441,6 +442,8 @@ protected SearchContext createSearchContextWithStarTreeContext( searchContext.getQueryShardContext().setStarTreeQueryContext(starTreeQueryContext); } + Stream.of(fieldTypes).forEach(fieldType -> when(mapperService.fieldType(fieldType.name())).thenReturn(fieldType)); + return searchContext; }