-
Notifications
You must be signed in to change notification settings - Fork 1.9k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[Star Tree] [Search] Keyword & Numeric Terms Aggregation #17165
base: main
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -40,9 +40,11 @@ | |
import org.apache.lucene.index.SortedSetDocValues; | ||
import org.apache.lucene.index.Terms; | ||
import org.apache.lucene.index.TermsEnum; | ||
import org.apache.lucene.search.DocIdSetIterator; | ||
import org.apache.lucene.search.Weight; | ||
import org.apache.lucene.util.ArrayUtil; | ||
import org.apache.lucene.util.BytesRef; | ||
import org.apache.lucene.util.FixedBitSet; | ||
import org.apache.lucene.util.PriorityQueue; | ||
import org.opensearch.common.SetOnce; | ||
import org.opensearch.common.lease.Releasable; | ||
|
@@ -51,6 +53,12 @@ | |
import org.opensearch.common.util.LongHash; | ||
import org.opensearch.core.common.io.stream.StreamOutput; | ||
import org.opensearch.core.xcontent.XContentBuilder; | ||
import org.opensearch.index.codec.composite.CompositeIndexFieldInfo; | ||
import org.opensearch.index.compositeindex.datacube.MetricStat; | ||
import org.opensearch.index.compositeindex.datacube.startree.index.StarTreeValues; | ||
import org.opensearch.index.compositeindex.datacube.startree.utils.StarTreeUtils; | ||
import org.opensearch.index.compositeindex.datacube.startree.utils.iterator.SortedNumericStarTreeValuesIterator; | ||
import org.opensearch.index.compositeindex.datacube.startree.utils.iterator.SortedSetStarTreeValuesIterator; | ||
import org.opensearch.index.mapper.DocCountFieldMapper; | ||
import org.opensearch.search.DocValueFormat; | ||
import org.opensearch.search.aggregations.AggregationExecutionException; | ||
|
@@ -63,14 +71,20 @@ | |
import org.opensearch.search.aggregations.InternalOrder; | ||
import org.opensearch.search.aggregations.LeafBucketCollector; | ||
import org.opensearch.search.aggregations.LeafBucketCollectorBase; | ||
import org.opensearch.search.aggregations.StarTreeBucketCollector; | ||
import org.opensearch.search.aggregations.StarTreePreComputeCollector; | ||
import org.opensearch.search.aggregations.bucket.LocalBucketCountThresholds; | ||
import org.opensearch.search.aggregations.bucket.terms.SignificanceLookup.BackgroundFrequencyForBytes; | ||
import org.opensearch.search.aggregations.bucket.terms.heuristic.SignificanceHeuristic; | ||
import org.opensearch.search.aggregations.support.ValuesSource; | ||
import org.opensearch.search.internal.SearchContext; | ||
import org.opensearch.search.startree.StarTreeQueryHelper; | ||
import org.opensearch.search.startree.StarTreeTraversalUtil; | ||
import org.opensearch.search.startree.filter.DimensionFilter; | ||
|
||
import java.io.IOException; | ||
import java.util.Arrays; | ||
import java.util.List; | ||
import java.util.Map; | ||
import java.util.function.BiConsumer; | ||
import java.util.function.Function; | ||
|
@@ -85,7 +99,7 @@ | |
* | ||
* @opensearch.internal | ||
*/ | ||
public class GlobalOrdinalsStringTermsAggregator extends AbstractStringTermsAggregator { | ||
public class GlobalOrdinalsStringTermsAggregator extends AbstractStringTermsAggregator implements StarTreePreComputeCollector { | ||
protected final ResultStrategy<?, ?, ?> resultStrategy; | ||
protected final ValuesSource.Bytes.WithOrdinals valuesSource; | ||
|
||
|
@@ -97,6 +111,7 @@ | |
private final SetOnce<SortedSetDocValues> dvs = new SetOnce<>(); | ||
protected int segmentsWithSingleValuedOrds = 0; | ||
protected int segmentsWithMultiValuedOrds = 0; | ||
LongUnaryOperator globalOperator; | ||
|
||
/** | ||
* Lookup global ordinals | ||
|
@@ -228,6 +243,10 @@ | |
(ord, docCount) -> incrementBucketDocCount(collectionStrategy.globalOrdToBucketOrd(0, ord), docCount) | ||
); | ||
} | ||
CompositeIndexFieldInfo supportedStarTree = StarTreeQueryHelper.getSupportedStarTree(this.context.getQueryShardContext()); | ||
if (supportedStarTree != null) { | ||
return preComputeWithStarTree(ctx, supportedStarTree); | ||
} | ||
return false; | ||
} | ||
|
||
|
@@ -307,6 +326,88 @@ | |
}); | ||
} | ||
|
||
public StarTreeBucketCollector getStarTreeBucketCollector( | ||
LeafReaderContext ctx, | ||
CompositeIndexFieldInfo starTree, | ||
StarTreeBucketCollector parent | ||
) throws IOException { | ||
assert parent == null; | ||
StarTreeValues starTreeValues = StarTreeQueryHelper.getStarTreeValues(ctx, starTree); | ||
return new StarTreeBucketCollector( | ||
starTreeValues, | ||
StarTreeTraversalUtil.getStarTreeResult( | ||
starTreeValues, | ||
StarTreeQueryHelper.mergeDimensionFilterIfNotExists( | ||
context.getQueryShardContext().getStarTreeQueryContext().getBaseQueryStarTreeFilter(), | ||
fieldName, | ||
List.of(DimensionFilter.MATCH_ALL_DEFAULT) | ||
), | ||
context | ||
) | ||
) { | ||
@Override | ||
public void setSubCollectors() throws IOException { | ||
for (Aggregator aggregator : subAggregators) { | ||
this.subCollectors.add(((StarTreePreComputeCollector) aggregator).getStarTreeBucketCollector(ctx, starTree, this)); | ||
} | ||
} | ||
Comment on lines
+349
to
+353
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Can you move this below the fields defined below? Fields within an anonymous class are kind of confusing, but when they are interspersed with methods, it hurts my brain. Actually, if you move the fields out of the anonymous class declaration and into the |
||
|
||
SortedSetStarTreeValuesIterator valuesIterator = (SortedSetStarTreeValuesIterator) starTreeValues.getDimensionValuesIterator( | ||
fieldName | ||
); | ||
|
||
String metricName = StarTreeUtils.fullyQualifiedFieldNameForStarTreeMetricsDocValues( | ||
starTree.getField(), | ||
"_doc_count", | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Should this be hard-coded? Should it be |
||
MetricStat.DOC_COUNT.getTypeName() | ||
); | ||
SortedNumericStarTreeValuesIterator docCountsIterator = (SortedNumericStarTreeValuesIterator) starTreeValues | ||
.getMetricValuesIterator(metricName); | ||
|
||
@Override | ||
public void collectStarTreeEntry(int starTreeEntry, long owningBucketOrd) throws IOException { | ||
|
||
if (valuesIterator.advanceExact(starTreeEntry) == false) { | ||
return; | ||
Check warning on line 371 in server/src/main/java/org/opensearch/search/aggregations/bucket/terms/GlobalOrdinalsStringTermsAggregator.java
|
||
} | ||
|
||
for (int i = 0, count = valuesIterator.docValueCount(); i < count; i++) { | ||
long dimensionValue = valuesIterator.nextOrd(); | ||
long ord = globalOperator.applyAsLong(dimensionValue); | ||
|
||
if (docCountsIterator.advanceExact(starTreeEntry)) { | ||
long metricValue = docCountsIterator.nextValue(); | ||
|
||
long bucketOrd = collectionStrategy.globalOrdToBucketOrd(0, ord); | ||
if (bucketOrd < 0) { | ||
bucketOrd = -1 - bucketOrd; | ||
collectStarTreeBucket(this, metricValue, bucketOrd, starTreeEntry); | ||
Check warning on line 384 in server/src/main/java/org/opensearch/search/aggregations/bucket/terms/GlobalOrdinalsStringTermsAggregator.java
|
||
} else { | ||
grow(bucketOrd + 1); | ||
collectStarTreeBucket(this, metricValue, bucketOrd, starTreeEntry); | ||
} | ||
} | ||
} | ||
} | ||
}; | ||
} | ||
|
||
private boolean preComputeWithStarTree(LeafReaderContext ctx, CompositeIndexFieldInfo supportedStarTree) throws IOException { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Should probably return |
||
globalOperator = valuesSource.globalOrdinalsMapping(ctx); | ||
StarTreeBucketCollector starTreeBucketCollector = getStarTreeBucketCollector(ctx, supportedStarTree, null); | ||
FixedBitSet matchingDocsBitSet = starTreeBucketCollector.getMatchingDocsBitSet(); | ||
|
||
int numBits = matchingDocsBitSet.length(); | ||
if (numBits > 0) { | ||
for (int bit = matchingDocsBitSet.nextSetBit(0); bit != DocIdSetIterator.NO_MORE_DOCS; bit = (bit + 1 < numBits) | ||
? matchingDocsBitSet.nextSetBit(bit + 1) | ||
: DocIdSetIterator.NO_MORE_DOCS) { | ||
starTreeBucketCollector.collectStarTreeEntry(bit, 0); | ||
} | ||
} | ||
return true; | ||
} | ||
|
||
@Override | ||
public InternalAggregation[] buildAggregations(long[] owningBucketOrds) throws IOException { | ||
return resultStrategy.buildAggregations(owningBucketOrds); | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -34,14 +34,22 @@ | |
import org.apache.lucene.index.IndexReader; | ||
import org.apache.lucene.index.LeafReaderContext; | ||
import org.apache.lucene.index.SortedNumericDocValues; | ||
import org.apache.lucene.search.DocIdSetIterator; | ||
import org.apache.lucene.search.ScoreMode; | ||
import org.apache.lucene.util.FixedBitSet; | ||
import org.apache.lucene.util.NumericUtils; | ||
import org.apache.lucene.util.PriorityQueue; | ||
import org.opensearch.common.Numbers; | ||
import org.opensearch.common.lease.Releasable; | ||
import org.opensearch.common.lease.Releasables; | ||
import org.opensearch.common.util.LongArray; | ||
import org.opensearch.index.codec.composite.CompositeIndexFieldInfo; | ||
import org.opensearch.index.compositeindex.datacube.MetricStat; | ||
import org.opensearch.index.compositeindex.datacube.startree.index.StarTreeValues; | ||
import org.opensearch.index.compositeindex.datacube.startree.utils.StarTreeUtils; | ||
import org.opensearch.index.compositeindex.datacube.startree.utils.iterator.SortedNumericStarTreeValuesIterator; | ||
import org.opensearch.index.fielddata.FieldData; | ||
import org.opensearch.index.mapper.NumberFieldMapper; | ||
import org.opensearch.search.DocValueFormat; | ||
import org.opensearch.search.aggregations.Aggregator; | ||
import org.opensearch.search.aggregations.AggregatorFactories; | ||
|
@@ -52,6 +60,8 @@ | |
import org.opensearch.search.aggregations.InternalOrder; | ||
import org.opensearch.search.aggregations.LeafBucketCollector; | ||
import org.opensearch.search.aggregations.LeafBucketCollectorBase; | ||
import org.opensearch.search.aggregations.StarTreeBucketCollector; | ||
import org.opensearch.search.aggregations.StarTreePreComputeCollector; | ||
import org.opensearch.search.aggregations.bucket.LocalBucketCountThresholds; | ||
import org.opensearch.search.aggregations.bucket.terms.IncludeExclude.LongFilter; | ||
import org.opensearch.search.aggregations.bucket.terms.LongKeyedBucketOrds.BucketOrdsEnum; | ||
|
@@ -60,6 +70,9 @@ | |
import org.opensearch.search.aggregations.support.ValuesSource; | ||
import org.opensearch.search.internal.ContextIndexSearcher; | ||
import org.opensearch.search.internal.SearchContext; | ||
import org.opensearch.search.startree.StarTreeQueryHelper; | ||
import org.opensearch.search.startree.StarTreeTraversalUtil; | ||
import org.opensearch.search.startree.filter.DimensionFilter; | ||
|
||
import java.io.IOException; | ||
import java.math.BigInteger; | ||
|
@@ -78,11 +91,12 @@ | |
* | ||
* @opensearch.internal | ||
*/ | ||
public class NumericTermsAggregator extends TermsAggregator { | ||
public class NumericTermsAggregator extends TermsAggregator implements StarTreePreComputeCollector { | ||
private final ResultStrategy<?, ?> resultStrategy; | ||
private final ValuesSource.Numeric valuesSource; | ||
private final LongKeyedBucketOrds bucketOrds; | ||
private final LongFilter longFilter; | ||
private final String fieldName; | ||
|
||
public NumericTermsAggregator( | ||
String name, | ||
|
@@ -104,6 +118,9 @@ | |
this.valuesSource = valuesSource; | ||
this.longFilter = longFilter; | ||
bucketOrds = LongKeyedBucketOrds.build(context.bigArrays(), cardinality); | ||
this.fieldName = (this.valuesSource instanceof ValuesSource.Numeric.FieldData) | ||
? ((ValuesSource.Numeric.FieldData) valuesSource).getIndexFieldName() | ||
: null; | ||
} | ||
|
||
@Override | ||
|
@@ -145,6 +162,101 @@ | |
}); | ||
} | ||
|
||
protected boolean tryPrecomputeAggregationForLeaf(LeafReaderContext ctx) throws IOException { | ||
CompositeIndexFieldInfo supportedStarTree = StarTreeQueryHelper.getSupportedStarTree(this.context.getQueryShardContext()); | ||
if (supportedStarTree != null) { | ||
return preComputeWithStarTree(ctx, supportedStarTree); | ||
} | ||
return false; | ||
} | ||
|
||
public StarTreeBucketCollector getStarTreeBucketCollector( | ||
LeafReaderContext ctx, | ||
CompositeIndexFieldInfo starTree, | ||
StarTreeBucketCollector parent | ||
) throws IOException { | ||
assert parent == null; | ||
StarTreeValues starTreeValues = StarTreeQueryHelper.getStarTreeValues(ctx, starTree); | ||
return new StarTreeBucketCollector( | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This is pretty similar to the one in |
||
starTreeValues, | ||
StarTreeTraversalUtil.getStarTreeResult( | ||
starTreeValues, | ||
StarTreeQueryHelper.mergeDimensionFilterIfNotExists( | ||
context.getQueryShardContext().getStarTreeQueryContext().getBaseQueryStarTreeFilter(), | ||
fieldName, | ||
List.of(DimensionFilter.MATCH_ALL_DEFAULT) | ||
), | ||
context | ||
) | ||
) { | ||
@Override | ||
public void setSubCollectors() throws IOException { | ||
for (Aggregator aggregator : subAggregators) { | ||
this.subCollectors.add(((StarTreePreComputeCollector) aggregator).getStarTreeBucketCollector(ctx, starTree, this)); | ||
} | ||
} | ||
|
||
SortedNumericStarTreeValuesIterator valuesIterator = (SortedNumericStarTreeValuesIterator) starTreeValues | ||
.getDimensionValuesIterator(fieldName); | ||
|
||
String metricName = StarTreeUtils.fullyQualifiedFieldNameForStarTreeMetricsDocValues( | ||
starTree.getField(), | ||
"_doc_count", | ||
MetricStat.DOC_COUNT.getTypeName() | ||
); | ||
SortedNumericStarTreeValuesIterator docCountsIterator = (SortedNumericStarTreeValuesIterator) starTreeValues | ||
.getMetricValuesIterator(metricName); | ||
|
||
@Override | ||
public void collectStarTreeEntry(int starTreeEntry, long owningBucketOrd) throws IOException { | ||
if (valuesIterator.advanceExact(starTreeEntry) == false) { | ||
return; | ||
Check warning on line 213 in server/src/main/java/org/opensearch/search/aggregations/bucket/terms/NumericTermsAggregator.java
|
||
} | ||
long dimensionValue = valuesIterator.nextValue(); | ||
// Only numeric & floating points are supported as of now in star-tree | ||
// TODO: Add support for isBigInteger() when it gets supported in star-tree | ||
if (valuesSource.isFloatingPoint()) { | ||
double doubleValue = ((NumberFieldMapper.NumberFieldType) context.mapperService().fieldType(fieldName)).toDoubleValue( | ||
dimensionValue | ||
); | ||
dimensionValue = NumericUtils.doubleToSortableLong(doubleValue); | ||
sandeshkr419 marked this conversation as resolved.
Show resolved
Hide resolved
|
||
} | ||
|
||
for (int i = 0, count = valuesIterator.entryValueCount(); i < count; i++) { | ||
|
||
if (docCountsIterator.advanceExact(starTreeEntry)) { | ||
long metricValue = docCountsIterator.nextValue(); | ||
long bucketOrd = bucketOrds.add(owningBucketOrd, dimensionValue); | ||
|
||
if (bucketOrd < 0) { | ||
bucketOrd = -1 - bucketOrd; | ||
collectStarTreeBucket(this, metricValue, bucketOrd, starTreeEntry); | ||
} else { | ||
grow(bucketOrd + 1); | ||
collectStarTreeBucket(this, metricValue, bucketOrd, starTreeEntry); | ||
} | ||
|
||
} | ||
} | ||
} | ||
}; | ||
} | ||
|
||
private boolean preComputeWithStarTree(LeafReaderContext ctx, CompositeIndexFieldInfo supportedStarTree) throws IOException { | ||
StarTreeBucketCollector starTreeBucketCollector = getStarTreeBucketCollector(ctx, supportedStarTree, null); | ||
FixedBitSet matchingDocsBitSet = starTreeBucketCollector.getMatchingDocsBitSet(); | ||
|
||
int numBits = matchingDocsBitSet.length(); | ||
if (numBits > 0) { | ||
for (int bit = matchingDocsBitSet.nextSetBit(0); bit != DocIdSetIterator.NO_MORE_DOCS; bit = (bit + 1 < numBits) | ||
? matchingDocsBitSet.nextSetBit(bit + 1) | ||
: DocIdSetIterator.NO_MORE_DOCS) { | ||
starTreeBucketCollector.collectStarTreeEntry(bit, 0); | ||
} | ||
} | ||
return true; | ||
} | ||
|
||
@Override | ||
public InternalAggregation[] buildAggregations(long[] owningBucketOrds) throws IOException { | ||
return resultStrategy.buildAggregations(owningBucketOrds); | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -291,6 +291,10 @@ private boolean subAggsNeedScore() { | |
|
||
@Override | ||
protected boolean shouldDefer(Aggregator aggregator) { | ||
return collectMode == SubAggCollectionMode.BREADTH_FIRST && !aggsUsedForSorting.contains(aggregator); | ||
// don't defer when StarTreeContext is set, don't defer when collectMode == SubAggCollectionMode.BREADTH_FIRST | ||
// this boolean condition can be further simplified but affects readability. | ||
return (context.getQueryShardContext().getStarTreeQueryContext() == null || collectMode != SubAggCollectionMode.BREADTH_FIRST) | ||
&& collectMode == SubAggCollectionMode.BREADTH_FIRST | ||
&& !aggsUsedForSorting.contains(aggregator); | ||
Comment on lines
+294
to
+298
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Could this be:
That is, make it even more complicated for readability. 😁 |
||
} | ||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
precomputeWithStarTree
always returnstrue
.