Skip to content

Commit

Permalink
Use DenseConjunctionBulkScorer for single queries sometimes.
Browse files Browse the repository at this point in the history
Even though there is a single clause, it often needs to be intersected, either
with live docs or with the collector's competitive iterator.

This uses `DenseConjunctionBulkScorer` for:
 - `MatchAllDocsQuery`,
 - `TermQuery` when scores are not needed and `docFreq >= maxDoc / 32`,
 - some constant-score queries including `PointRangeQuery` when
   `cost >= maxDoc / 32`.

In addition,
 - `DenseConjunctionBulkScorer` was improved to stop collecting hits when the
   min competitive score is greater than the configured constant score,
 - Added test coverage to sorting tests to make sure that their competitive
   iterators are happy with how `DenseConjunctionBulkScorer` reads ahead.

The downside of this change is that it forces the impacted queries to score at
least 4096 (the window size) docs at once. So queries that can start skipping
very early (and which are thus very fast) may see a slowdown (e.g.
`TermMonthSort`). On the other hand, when it takes time before dynamic pruning
becomes effective, there could be a speedup thanks to the more efficient
intersection logic (e.g.  `TermDTSort`).

This change should also help in the presence of deleted docs, by taking
advantage of the more efficient way how deleted docs are applied in this bulk
scorer.

Closes #14283
  • Loading branch information
jpountz committed Feb 25, 2025
1 parent 27cf1e1 commit 5c71482
Show file tree
Hide file tree
Showing 19 changed files with 885 additions and 480 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.NumericDocValues;
import org.apache.lucene.index.SortedNumericDocValues;
import org.apache.lucene.search.ConstantScoreScorer;
import org.apache.lucene.search.ConstantScoreScorerSupplier;
import org.apache.lucene.search.ConstantScoreWeight;
import org.apache.lucene.search.DocIdSetIterator;
import org.apache.lucene.search.DocValuesRangeIterator;
Expand Down Expand Up @@ -116,18 +116,17 @@ public ScorerSupplier scorerSupplier(LeafReaderContext context) throws IOExcepti
return null;
}

int maxDoc = context.reader().maxDoc();
DocValuesSkipper skipper = context.reader().getDocValuesSkipper(field);
if (skipper != null) {
if (skipper.minValue() > upperValue || skipper.maxValue() < lowerValue) {
return null;
}
if (skipper.docCount() == context.reader().maxDoc()
if (skipper.docCount() == maxDoc
&& skipper.minValue() >= lowerValue
&& skipper.maxValue() <= upperValue) {
final var scorer =
new ConstantScoreScorer(
score(), scoreMode, DocIdSetIterator.all(skipper.docCount()));
return new DefaultScorerSupplier(scorer);
return ConstantScoreScorerSupplier.fromIterator(
DocIdSetIterator.all(maxDoc), score(), scoreMode, maxDoc);
}
}

Expand All @@ -139,8 +138,8 @@ public ScorerSupplier scorerSupplier(LeafReaderContext context) throws IOExcepti
final DocIdSetIterator psIterator =
getDocIdSetIteratorOrNullForPrimarySort(context.reader(), singleton, skipper);
if (psIterator != null) {
return new DefaultScorerSupplier(
new ConstantScoreScorer(score(), scoreMode, psIterator));
return ConstantScoreScorerSupplier.fromIterator(
psIterator, score(), scoreMode, maxDoc);
}
}
iterator =
Expand Down Expand Up @@ -182,8 +181,8 @@ public float matchCost() {
if (skipper != null) {
iterator = new DocValuesRangeIterator(iterator, skipper, lowerValue, upperValue, false);
}
final var scorer = new ConstantScoreScorer(score(), scoreMode, iterator);
return new DefaultScorerSupplier(scorer);
return ConstantScoreScorerSupplier.fromIterator(
TwoPhaseIterator.asDocIdSetIterator(iterator), score(), scoreMode, maxDoc);
}
};
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.SortedDocValues;
import org.apache.lucene.index.SortedSetDocValues;
import org.apache.lucene.search.ConstantScoreScorer;
import org.apache.lucene.search.ConstantScoreScorerSupplier;
import org.apache.lucene.search.ConstantScoreWeight;
import org.apache.lucene.search.DocIdSetIterator;
import org.apache.lucene.search.DocValuesRangeIterator;
Expand All @@ -34,7 +34,6 @@
import org.apache.lucene.search.Query;
import org.apache.lucene.search.QueryVisitor;
import org.apache.lucene.search.ScoreMode;
import org.apache.lucene.search.Scorer;
import org.apache.lucene.search.ScorerSupplier;
import org.apache.lucene.search.Sort;
import org.apache.lucene.search.TwoPhaseIterator;
Expand Down Expand Up @@ -122,9 +121,9 @@ public ScorerSupplier scorerSupplier(LeafReaderContext context) throws IOExcepti
SortedSetDocValues values = DocValues.getSortedSet(context.reader(), field);

// implement ScorerSupplier, since we do some expensive stuff to make a scorer
return new ScorerSupplier() {
return new ConstantScoreScorerSupplier(score(), scoreMode, context.reader().maxDoc()) {
@Override
public Scorer get(long leadCost) throws IOException {
public DocIdSetIterator iterator(long leadCost) throws IOException {

final long minOrd;
if (lowerValue == null) {
Expand Down Expand Up @@ -158,16 +157,15 @@ public Scorer get(long leadCost) throws IOException {
if (minOrd > maxOrd
|| (skipper != null
&& (minOrd > skipper.maxValue() || maxOrd < skipper.minValue()))) {
return new ConstantScoreScorer(score(), scoreMode, DocIdSetIterator.empty());
return DocIdSetIterator.empty();
}

// all terms matched in this segment
if (skipper != null
&& skipper.docCount() == context.reader().maxDoc()
&& skipper.minValue() >= minOrd
&& skipper.maxValue() <= maxOrd) {
return new ConstantScoreScorer(
score(), scoreMode, DocIdSetIterator.all(skipper.docCount()));
return DocIdSetIterator.all(skipper.docCount());
}

final SortedDocValues singleton = DocValues.unwrapSingleton(values);
Expand All @@ -178,7 +176,7 @@ public Scorer get(long leadCost) throws IOException {
getDocIdSetIteratorOrNullForPrimarySort(
context.reader(), singleton, skipper, minOrd, maxOrd);
if (psIterator != null) {
return new ConstantScoreScorer(score(), scoreMode, psIterator);
return psIterator;
}
}
iterator =
Expand Down Expand Up @@ -220,7 +218,7 @@ public float matchCost() {
if (skipper != null) {
iterator = new DocValuesRangeIterator(iterator, skipper, minOrd, maxOrd, false);
}
return new ConstantScoreScorer(score(), scoreMode, iterator);
return TwoPhaseIterator.asDocIdSetIterator(iterator);
}

@Override
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -335,7 +335,8 @@ BulkScorer filteredOptionalBulkScorer() throws IOException {
if (filters.stream().map(Scorer::twoPhaseIterator).allMatch(Objects::isNull)
&& maxDoc >= DenseConjunctionBulkScorer.WINDOW_SIZE
&& cost >= maxDoc / DenseConjunctionBulkScorer.DENSITY_THRESHOLD_INVERSE) {
return new DenseConjunctionBulkScorer(filters.stream().map(Scorer::iterator).toList());
return new DenseConjunctionBulkScorer(
filters.stream().map(Scorer::iterator).toList(), maxDoc, 0f);
}

return new DefaultBulkScorer(new ConjunctionScorer(filters, Collections.emptyList()));
Expand Down Expand Up @@ -397,7 +398,7 @@ private BulkScorer requiredBulkScorer() throws IOException {
&& maxDoc >= DenseConjunctionBulkScorer.WINDOW_SIZE
&& leadCost >= maxDoc / DenseConjunctionBulkScorer.DENSITY_THRESHOLD_INVERSE) {
return new DenseConjunctionBulkScorer(
requiredNoScoring.stream().map(Scorer::iterator).toList());
requiredNoScoring.stream().map(Scorer::iterator).toList(), maxDoc, 0f);
} else {
return new ConjunctionBulkScorer(requiredScoring, requiredNoScoring);
}
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,78 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.lucene.search;

import java.io.IOException;
import java.util.Collections;
import org.apache.lucene.search.Weight.DefaultBulkScorer;

/** Specialization of {@link ScorerSupplier} for queries that produce constant scores. */
public abstract class ConstantScoreScorerSupplier extends ScorerSupplier {

public static ConstantScoreScorerSupplier fromIterator(
DocIdSetIterator iterator, float score, ScoreMode scoreMode, int maxDoc) {
return new ConstantScoreScorerSupplier(score, scoreMode, maxDoc) {

@Override
public long cost() {
return iterator.cost();
}

@Override
public DocIdSetIterator iterator(long leadCost) throws IOException {
return iterator;
}
};
}

private final ScoreMode scoreMode;
private final float score;
private final int maxDoc;

/** Constructor, invoked by sub-classes. */
protected ConstantScoreScorerSupplier(float score, ScoreMode scoreMode, int maxDoc) {
this.scoreMode = scoreMode;
this.score = score;
this.maxDoc = maxDoc;
}

/** Return an iterator given the cost of the leading clause. */
public abstract DocIdSetIterator iterator(long leadCost) throws IOException;

@Override
public final Scorer get(long leadCost) throws IOException {
DocIdSetIterator iterator = iterator(leadCost);
TwoPhaseIterator twoPhase = TwoPhaseIterator.unwrap(iterator);
if (twoPhase == null) {
return new ConstantScoreScorer(score, scoreMode, iterator);
} else {
return new ConstantScoreScorer(score, scoreMode, twoPhase);
}
}

@Override
public final BulkScorer bulkScorer() throws IOException {
DocIdSetIterator iterator = iterator(Long.MAX_VALUE);
if (maxDoc >= DenseConjunctionBulkScorer.WINDOW_SIZE / 2
&& iterator.cost() >= maxDoc / DenseConjunctionBulkScorer.DENSITY_THRESHOLD_INVERSE
&& TwoPhaseIterator.unwrap(iterator) == null) {
return new DenseConjunctionBulkScorer(Collections.singletonList(iterator), maxDoc, score);
} else {
return new DefaultBulkScorer(new ConstantScoreScorer(score, scoreMode, iterator));
}
}
}
Loading

0 comments on commit 5c71482

Please sign in to comment.