diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt index 8ba40ef88d06..350c1ffa40cf 100644 --- a/lucene/CHANGES.txt +++ b/lucene/CHANGES.txt @@ -984,6 +984,8 @@ API Changes * GITHUB#13820, GITHUB#13825, GITHUB#13830: Corrects DataInput.readGroupVInts to be public and not-final, removes the protected DataInput.readGroupVInt method. (Zhang Chao, Robert Muir, Uwe Schindler, Dawid Weiss) +* GITHUB#15376, GITHUB#15197: Added prefetching in bkd tree traversal, couple of new api in PointValues visitDocIDs from a position and prepareOrVisitDocIDs to prefetch the IO before visiting docIds (Saurabh Singh) + New Features --------------------- diff --git a/lucene/core/src/java/org/apache/lucene/index/PointValues.java b/lucene/core/src/java/org/apache/lucene/index/PointValues.java index c77eec0e5ffd..6d0b692a285a 100644 --- a/lucene/core/src/java/org/apache/lucene/index/PointValues.java +++ b/lucene/core/src/java/org/apache/lucene/index/PointValues.java @@ -20,6 +20,8 @@ import java.io.UncheckedIOException; import java.math.BigInteger; import java.net.InetAddress; +import java.util.ArrayList; +import java.util.List; import org.apache.lucene.document.BinaryPoint; import org.apache.lucene.document.DoublePoint; import org.apache.lucene.document.Field; @@ -274,6 +276,17 @@ public interface PointTree extends Cloneable { /** Visit all the docs and values below the current node. */ void visitDocValues(IntersectVisitor visitor) throws IOException; + + /** Visit all the docs below the node at position pos */ + default void visitDocIDs(long pos, IntersectVisitor visitor) throws IOException {} + ; + + /** + * call prefetch for docs below the current node if vistor supports prefetching otherwise visit + * docIds + */ + default void prepareOrVisitDocIDs(IntersectVisitor visitor) throws IOException {} + ; } /** @@ -341,6 +354,43 @@ default void visit(DocIdSetIterator iterator, byte[] packedValue) throws IOExcep default void grow(int count) {} } + /** + * We can recurse the {@link PointTree} using prefetch capable visitor. This visitor caches the + * blocks the blocks during recursion, calling prefetch on required blocks. This should + * potentially trigger IO for these blocks asynchronously. Once the recursion is complete all the + * cached blocks are visited one by one. + * + * @lucene.experimental + */ + public abstract static class PrefetchCapableVisitor implements IntersectVisitor { + + int lastMatchingOrdinal = -1; + List prefetchedBlocks = new ArrayList<>(); + + /** + * return the last matched block ordinal - this is used to avoid prefetching call for contiguous + * ordinals assuming contiguous ordinals prefetching can be taken care by readaheads. + */ + public int lastMatchedBlock() { + return lastMatchingOrdinal; + } + + /** set last matched block ordinal * */ + public void setLastMatchedBlock(int leafNodeOrdinal) { + lastMatchingOrdinal = leafNodeOrdinal; + } + + /** save prefetched block for visting later on * */ + public void savePrefetchedBlockForLaterVisit(long leafFp) { + prefetchedBlocks.add(leafFp); + } + + /** returns the saved prefetch blocks * */ + public List savedPrefetchedBlocks() { + return new ArrayList<>(prefetchedBlocks); + } + } + /** * Finds all documents and points matching the provided visitor. This method does not enforce live * documents, so it's up to the caller to test whether each document is deleted, if necessary. @@ -348,6 +398,12 @@ default void grow(int count) {} public final void intersect(IntersectVisitor visitor) throws IOException { final PointTree pointTree = getPointTree(); intersect(visitor, pointTree); + if (visitor instanceof PrefetchCapableVisitor prefetchCapableVisitor) { + List fps = prefetchCapableVisitor.savedPrefetchedBlocks(); + for (int fp = 0; fp < fps.size(); ++fp) { + pointTree.visitDocIDs(fps.get(fp), visitor); + } + } assert pointTree.moveToParent() == false; } @@ -358,7 +414,8 @@ private static void intersect(IntersectVisitor visitor, PointTree pointTree) thr if (compare == Relation.CELL_INSIDE_QUERY) { // This cell is fully inside the query shape: recursively add all points in this cell // without filtering - pointTree.visitDocIDs(visitor); + // pointTree.visitDocIDs( visitor); + pointTree.prepareOrVisitDocIDs(visitor); } else if (compare == Relation.CELL_CROSSES_QUERY) { // The cell crosses the shape boundary, or the cell fully contains the query, so we fall // through and do full filtering: diff --git a/lucene/core/src/java/org/apache/lucene/search/PointRangeQuery.java b/lucene/core/src/java/org/apache/lucene/search/PointRangeQuery.java index c198fecb4b35..f9adc6e2cea4 100644 --- a/lucene/core/src/java/org/apache/lucene/search/PointRangeQuery.java +++ b/lucene/core/src/java/org/apache/lucene/search/PointRangeQuery.java @@ -147,7 +147,7 @@ private boolean matches(byte[] packedValue) { } private IntersectVisitor getIntersectVisitor(DocIdSetBuilder result) { - return new IntersectVisitor() { + return new PointValues.PrefetchCapableVisitor() { DocIdSetBuilder.BulkAdder adder; @@ -194,7 +194,7 @@ public Relation compare(byte[] minPackedValue, byte[] maxPackedValue) { /** Create a visitor that sets documents that do NOT match the range. */ private IntersectVisitor getInverseIntersectVisitor(FixedBitSet result, long[] cost) { - return new IntersectVisitor() { + return new PointValues.PrefetchCapableVisitor() { @Override public void visit(int docID) { diff --git a/lucene/core/src/java/org/apache/lucene/util/bkd/BKDReader.java b/lucene/core/src/java/org/apache/lucene/util/bkd/BKDReader.java index 9c991e6b1b4a..7b4b30075018 100644 --- a/lucene/core/src/java/org/apache/lucene/util/bkd/BKDReader.java +++ b/lucene/core/src/java/org/apache/lucene/util/bkd/BKDReader.java @@ -589,6 +589,69 @@ public void visitDocIDs(PointValues.IntersectVisitor visitor) throws IOException addAll(visitor, false); } + @Override + public void prepareOrVisitDocIDs(IntersectVisitor visitor) throws IOException { + resetNodeDataPosition(); + prefetchAll(visitor, false); + } + + @Override + public void visitDocIDs(long position, IntersectVisitor visitor) throws IOException { + visitDocIDs(position, visitor, false); + } + + private void visitDocIDs(long position, IntersectVisitor visitor, boolean grown) + throws IOException { + leafNodes.seek(position); + int count = leafNodes.readVInt(); + if (!grown) { + visitor.grow(count); + } + docIdsWriter.readInts(leafNodes, count, visitor, scratchIterator.docIDs); + } + + private int getLeafNodeOrdinal() { + assert isLeafNode() : "nodeID=" + nodeID + " is not a leaf"; + return nodeID - leafNodeOffset; + } + + public void prefetchAll(IntersectVisitor visitor, boolean grown) throws IOException { + if (grown == false) { + final long size = size(); + if (size <= Integer.MAX_VALUE) { + visitor.grow((int) size); + grown = true; + } + } + if (isLeafNode()) { + // int count = isLastLeaf() ? config.maxPointsInLeafNode() : lastLeafNodePointCount; + long leafFp = getLeafBlockFP(); + int leafNodeOrdinal = getLeafNodeOrdinal(); + if (visitor instanceof PrefetchCapableVisitor prefetchCapableVisitor) { + // Only call prefetch is this is the first leaf node ordinal or the first match in + // contigiuous sequence of matches for leaf nodes + // boolean prefetched = false; + if (prefetchCapableVisitor.lastMatchedBlock() == -1 + || prefetchCapableVisitor.lastMatchedBlock() + 1 < leafNodeOrdinal) { + // System.out.println("Prefetched called on " + leafNodeOrdinal); + leafNodes.prefetch(leafFp, 1); + // prefetched = true; + } + prefetchCapableVisitor.setLastMatchedBlock(leafNodeOrdinal); + prefetchCapableVisitor.savePrefetchedBlockForLaterVisit(leafFp); + } else { + visitDocIDs(getLeafBlockFP(), visitor, true); + } + } else { + pushLeft(); + prefetchAll(visitor, grown); + pop(); + pushRight(); + prefetchAll(visitor, grown); + pop(); + } + } + public void addAll(PointValues.IntersectVisitor visitor, boolean grown) throws IOException { if (grown == false) { final long size = size(); diff --git a/lucene/test-framework/src/java/org/apache/lucene/tests/index/AssertingLeafReader.java b/lucene/test-framework/src/java/org/apache/lucene/tests/index/AssertingLeafReader.java index 1958cd4b9588..4da41cbb782d 100644 --- a/lucene/test-framework/src/java/org/apache/lucene/tests/index/AssertingLeafReader.java +++ b/lucene/test-framework/src/java/org/apache/lucene/tests/index/AssertingLeafReader.java @@ -17,8 +17,10 @@ package org.apache.lucene.tests.index; import java.io.IOException; +import java.util.ArrayList; import java.util.Arrays; import java.util.Iterator; +import java.util.List; import java.util.Objects; import org.apache.lucene.index.BinaryDocValues; import org.apache.lucene.index.DocValues; @@ -1597,13 +1599,23 @@ public void visitDocValues(IntersectVisitor visitor) throws IOException { pointValues.getBytesPerDimension(), visitor)); } + + @Override + public void visitDocIDs(long pos, IntersectVisitor visitor) throws IOException { + in.visitDocIDs(pos, visitor); + } + + @Override + public void prepareOrVisitDocIDs(IntersectVisitor visitor) throws IOException { + in.prepareOrVisitDocIDs(visitor); + } } /** * Validates in the 1D case that all points are visited in order, and point values are in bounds * of the last cell checked */ - static class AssertingIntersectVisitor implements IntersectVisitor { + static class AssertingIntersectVisitor extends PointValues.PrefetchCapableVisitor { final IntersectVisitor in; final int numDataDims; final int numIndexDims; @@ -1614,6 +1626,8 @@ static class AssertingIntersectVisitor implements IntersectVisitor { private Relation lastCompareResult; private int lastDocID = -1; private int docBudget; + int lastMatchedBlock; + private List prefetchedBlocks; AssertingIntersectVisitor( int numDataDims, int numIndexDims, int bytesPerDim, IntersectVisitor in) { @@ -1716,6 +1730,26 @@ public Relation compare(byte[] minPackedValue, byte[] maxPackedValue) { lastCompareResult = in.compare(minPackedValue, maxPackedValue); return lastCompareResult; } + + @Override + public int lastMatchedBlock() { + return lastMatchedBlock; + } + + @Override + public void setLastMatchedBlock(int leafNodeOrdinal) { + lastMatchedBlock = leafNodeOrdinal; + } + + @Override + public void savePrefetchedBlockForLaterVisit(long leafFp) { + prefetchedBlocks.add(leafFp); + } + + @Override + public List savedPrefetchedBlocks() { + return new ArrayList<>(prefetchedBlocks); + } } @Override