forked from hibernate/hibernate-search
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
HSEARCH-5133 Implement Lucene count distinct aggregations
- Loading branch information
Showing
6 changed files
with
273 additions
and
0 deletions.
There are no files selected for viewing
37 changes: 37 additions & 0 deletions
37
...rg/hibernate/search/backend/lucene/lowlevel/aggregation/collector/impl/CountDistinct.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,37 @@ | ||
/* | ||
* SPDX-License-Identifier: Apache-2.0 | ||
* Copyright Red Hat Inc. and Hibernate Authors | ||
*/ | ||
package org.hibernate.search.backend.lucene.lowlevel.aggregation.collector.impl; | ||
|
||
import java.util.BitSet; | ||
import java.util.HashSet; | ||
|
||
/** | ||
* <p> | ||
* The algorithm to collect distinct elements is inspired by {@code org.apache.lucene.facet.LongValueFacetCounts} | ||
* of <a href="https://lucene.apache.org/">Apache Lucene project</a>. | ||
*/ | ||
public class CountDistinct { | ||
|
||
private final BitSet counts = new BitSet( 1024 ); | ||
private final HashSet<Long> hashCounts = new HashSet<>(); | ||
|
||
public void increment(long value) { | ||
if ( value >= 0 && value < counts.size() ) { | ||
counts.set( (int) value ); | ||
} | ||
else { | ||
hashCounts.add( value ); | ||
} | ||
} | ||
|
||
public long result() { | ||
return counts.cardinality() + hashCounts.size(); | ||
} | ||
|
||
public void merge(CountDistinct other) { | ||
counts.or( other.counts ); | ||
hashCounts.addAll( other.hashCounts ); | ||
} | ||
} |
67 changes: 67 additions & 0 deletions
67
...ate/search/backend/lucene/lowlevel/aggregation/collector/impl/CountDistinctCollector.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,67 @@ | ||
/* | ||
* SPDX-License-Identifier: Apache-2.0 | ||
* Copyright Red Hat Inc. and Hibernate Authors | ||
*/ | ||
package org.hibernate.search.backend.lucene.lowlevel.aggregation.collector.impl; | ||
|
||
import java.io.IOException; | ||
|
||
import org.hibernate.search.backend.lucene.lowlevel.docvalues.impl.LongMultiValues; | ||
import org.hibernate.search.backend.lucene.lowlevel.docvalues.impl.LongMultiValuesSource; | ||
|
||
import org.apache.lucene.index.LeafReaderContext; | ||
import org.apache.lucene.search.Collector; | ||
import org.apache.lucene.search.LeafCollector; | ||
import org.apache.lucene.search.Scorable; | ||
import org.apache.lucene.search.ScoreMode; | ||
|
||
public class CountDistinctCollector implements Collector { | ||
|
||
private final LongMultiValuesSource valueSource; | ||
private final CountDistinct counter = new CountDistinct(); | ||
|
||
public CountDistinctCollector(LongMultiValuesSource valueSource) { | ||
this.valueSource = valueSource; | ||
} | ||
|
||
public long count() { | ||
return counter.result(); | ||
} | ||
|
||
public void merge(CountDistinctCollector collector) { | ||
counter.merge( collector.counter ); | ||
} | ||
|
||
@Override | ||
public LeafCollector getLeafCollector(LeafReaderContext context) throws IOException { | ||
return new CountDistinctLeafCollector( valueSource.getValues( context ) ); | ||
} | ||
|
||
@Override | ||
public ScoreMode scoreMode() { | ||
return ScoreMode.COMPLETE_NO_SCORES; | ||
} | ||
|
||
public class CountDistinctLeafCollector implements LeafCollector { | ||
private final LongMultiValues values; | ||
|
||
public CountDistinctLeafCollector(LongMultiValues values) { | ||
this.values = values; | ||
} | ||
|
||
@Override | ||
public void collect(int doc) throws IOException { | ||
if ( values.advanceExact( doc ) ) { | ||
while ( values.hasNextValue() ) { | ||
long value = values.nextValue(); | ||
counter.increment( value ); | ||
} | ||
} | ||
} | ||
|
||
@Override | ||
public void setScorer(Scorable scorer) { | ||
// no-op by default | ||
} | ||
} | ||
} |
32 changes: 32 additions & 0 deletions
32
...rch/backend/lucene/lowlevel/aggregation/collector/impl/CountDistinctCollectorFactory.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,32 @@ | ||
/* | ||
* SPDX-License-Identifier: Apache-2.0 | ||
* Copyright Red Hat Inc. and Hibernate Authors | ||
*/ | ||
package org.hibernate.search.backend.lucene.lowlevel.aggregation.collector.impl; | ||
|
||
import org.hibernate.search.backend.lucene.lowlevel.collector.impl.CollectorExecutionContext; | ||
import org.hibernate.search.backend.lucene.lowlevel.collector.impl.CollectorFactory; | ||
import org.hibernate.search.backend.lucene.lowlevel.collector.impl.CollectorKey; | ||
import org.hibernate.search.backend.lucene.lowlevel.docvalues.impl.JoiningLongMultiValuesSource; | ||
|
||
public class CountDistinctCollectorFactory | ||
implements CollectorFactory<CountDistinctCollector, Long, CountDistinctCollectorManager> { | ||
|
||
private final JoiningLongMultiValuesSource source; | ||
private final CollectorKey<CountDistinctCollector, Long> key; | ||
|
||
public CountDistinctCollectorFactory(JoiningLongMultiValuesSource source, CollectorKey<CountDistinctCollector, Long> key) { | ||
this.source = source; | ||
this.key = key; | ||
} | ||
|
||
@Override | ||
public CountDistinctCollectorManager createCollectorManager(CollectorExecutionContext context) { | ||
return new CountDistinctCollectorManager( source ); | ||
} | ||
|
||
@Override | ||
public CollectorKey<CountDistinctCollector, Long> getCollectorKey() { | ||
return key; | ||
} | ||
} |
41 changes: 41 additions & 0 deletions
41
...rch/backend/lucene/lowlevel/aggregation/collector/impl/CountDistinctCollectorManager.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,41 @@ | ||
/* | ||
* SPDX-License-Identifier: Apache-2.0 | ||
* Copyright Red Hat Inc. and Hibernate Authors | ||
*/ | ||
package org.hibernate.search.backend.lucene.lowlevel.aggregation.collector.impl; | ||
|
||
import java.io.IOException; | ||
import java.util.Collection; | ||
import java.util.LinkedList; | ||
|
||
import org.hibernate.search.backend.lucene.lowlevel.docvalues.impl.JoiningLongMultiValuesSource; | ||
|
||
import org.apache.lucene.search.CollectorManager; | ||
|
||
public class CountDistinctCollectorManager implements CollectorManager<CountDistinctCollector, Long> { | ||
|
||
private final JoiningLongMultiValuesSource source; | ||
|
||
public CountDistinctCollectorManager(JoiningLongMultiValuesSource source) { | ||
this.source = source; | ||
} | ||
|
||
@Override | ||
public CountDistinctCollector newCollector() { | ||
return new CountDistinctCollector( source ); | ||
} | ||
|
||
@Override | ||
public Long reduce(Collection<CountDistinctCollector> collectors) throws IOException { | ||
if ( collectors.isEmpty() ) { | ||
return 0L; | ||
} | ||
|
||
LinkedList<CountDistinctCollector> distinctCollectors = new LinkedList<>( collectors ); | ||
CountDistinctCollector collector = distinctCollectors.removeLast(); | ||
for ( CountDistinctCollector other : distinctCollectors ) { | ||
collector.merge( other ); | ||
} | ||
return collector.count(); | ||
} | ||
} |
93 changes: 93 additions & 0 deletions
93
...nate/search/backend/lucene/types/aggregation/impl/LuceneMetricNumericLongAggregation.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,93 @@ | ||
/* | ||
* SPDX-License-Identifier: Apache-2.0 | ||
* Copyright Red Hat Inc. and Hibernate Authors | ||
*/ | ||
package org.hibernate.search.backend.lucene.types.aggregation.impl; | ||
|
||
import java.util.Set; | ||
|
||
import org.hibernate.search.backend.lucene.lowlevel.aggregation.collector.impl.CountDistinctCollector; | ||
import org.hibernate.search.backend.lucene.lowlevel.aggregation.collector.impl.CountDistinctCollectorFactory; | ||
import org.hibernate.search.backend.lucene.lowlevel.collector.impl.CollectorKey; | ||
import org.hibernate.search.backend.lucene.lowlevel.docvalues.impl.JoiningLongMultiValuesSource; | ||
import org.hibernate.search.backend.lucene.search.aggregation.impl.AggregationExtractContext; | ||
import org.hibernate.search.backend.lucene.search.aggregation.impl.AggregationRequestContext; | ||
import org.hibernate.search.backend.lucene.search.common.impl.AbstractLuceneCodecAwareSearchQueryElementFactory; | ||
import org.hibernate.search.backend.lucene.search.common.impl.LuceneSearchIndexScope; | ||
import org.hibernate.search.backend.lucene.search.common.impl.LuceneSearchIndexValueFieldContext; | ||
import org.hibernate.search.backend.lucene.types.codec.impl.AbstractLuceneNumericFieldCodec; | ||
import org.hibernate.search.engine.search.aggregation.spi.FieldMetricAggregationBuilder; | ||
|
||
public class LuceneMetricNumericLongAggregation extends AbstractLuceneNestableAggregation<Long> { | ||
|
||
private final Set<String> indexNames; | ||
private final String absoluteFieldPath; | ||
private final String operation; | ||
private final CollectorKey<CountDistinctCollector, Long> collectorKey; | ||
|
||
LuceneMetricNumericLongAggregation(Builder builder) { | ||
super( builder ); | ||
this.indexNames = builder.scope.hibernateSearchIndexNames(); | ||
this.absoluteFieldPath = builder.field.absolutePath(); | ||
this.operation = builder.operation; | ||
this.collectorKey = CollectorKey.create(); | ||
} | ||
|
||
@Override | ||
public Extractor<Long> request(AggregationRequestContext context) { | ||
JoiningLongMultiValuesSource source = JoiningLongMultiValuesSource.fromField( | ||
absoluteFieldPath, createNestedDocsProvider( context ) | ||
); | ||
if ( "cardinality".equals( operation ) ) { | ||
context.requireCollector( new CountDistinctCollectorFactory( source, collectorKey ) ); | ||
} | ||
return new LuceneNumericMetricLongAggregationExtraction(); | ||
} | ||
|
||
@Override | ||
public Set<String> indexNames() { | ||
return indexNames; | ||
} | ||
|
||
private class LuceneNumericMetricLongAggregationExtraction implements Extractor<Long> { | ||
@Override | ||
public Long extract(AggregationExtractContext context) { | ||
return context.getFacets( collectorKey ); | ||
} | ||
} | ||
|
||
public static class Factory<F> | ||
extends AbstractLuceneCodecAwareSearchQueryElementFactory<FieldMetricAggregationBuilder<Long>, | ||
F, | ||
AbstractLuceneNumericFieldCodec<F, ?>> { | ||
|
||
private final String operation; | ||
|
||
public Factory(AbstractLuceneNumericFieldCodec<F, ?> codec, String operation) { | ||
super( codec ); | ||
this.operation = operation; | ||
} | ||
|
||
@Override | ||
public FieldMetricAggregationBuilder<Long> create(LuceneSearchIndexScope<?> scope, | ||
LuceneSearchIndexValueFieldContext<F> field) { | ||
return new Builder( scope, field, operation ); | ||
} | ||
} | ||
|
||
private static class Builder extends AbstractBuilder<Long> implements FieldMetricAggregationBuilder<Long> { | ||
private final String operation; | ||
|
||
public Builder(LuceneSearchIndexScope<?> scope, | ||
LuceneSearchIndexValueFieldContext<?> field, | ||
String operation) { | ||
super( scope, field ); | ||
this.operation = operation; | ||
} | ||
|
||
@Override | ||
public LuceneMetricNumericLongAggregation build() { | ||
return new LuceneMetricNumericLongAggregation( this ); | ||
} | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters