From ed619e96361c43c815f755a97912a5531910226b Mon Sep 17 00:00:00 2001 From: Sarthak Aggarwal Date: Fri, 4 Oct 2024 15:47:00 +0530 Subject: [PATCH] supporting range over star-tree file formats Signed-off-by: Sarthak Aggarwal --- .../node/FixedLengthStarTreeNode.java | 165 ++++++++++++++---- .../node/FixedLengthStarTreeNodeTests.java | 72 +++++++- 2 files changed, 201 insertions(+), 36 deletions(-) diff --git a/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/fileformats/node/FixedLengthStarTreeNode.java b/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/fileformats/node/FixedLengthStarTreeNode.java index df2ce9096bfc1..4d0dcab60f0c0 100644 --- a/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/fileformats/node/FixedLengthStarTreeNode.java +++ b/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/fileformats/node/FixedLengthStarTreeNode.java @@ -7,6 +7,8 @@ */ package org.opensearch.index.compositeindex.datacube.startree.fileformats.node; +import org.apache.logging.log4j.LogManager; +import org.apache.logging.log4j.Logger; import org.apache.lucene.store.RandomAccessInput; import org.opensearch.index.compositeindex.datacube.startree.node.StarTreeNode; import org.opensearch.index.compositeindex.datacube.startree.node.StarTreeNodeType; @@ -14,6 +16,7 @@ import java.io.IOException; import java.io.UncheckedIOException; import java.util.Iterator; +import java.util.NoSuchElementException; /** * Fixed Length implementation of {@link StarTreeNode}. @@ -36,6 +39,8 @@ */ public class FixedLengthStarTreeNode implements StarTreeNode { + private static final Logger logger = LogManager.getLogger(FixedLengthStarTreeNode.class); + /** * Number of integer fields in the serializable data */ @@ -200,7 +205,10 @@ public StarTreeNode getChildForDimensionValue(Long dimensionValue) throws IOExce StarTreeNode resultStarTreeNode = null; if (null != dimensionValue) { - resultStarTreeNode = binarySearchChild(dimensionValue); + int resultStarTreeNodeId = binarySearchForDimension(dimensionValue, false); + if (resultStarTreeNodeId != INVALID_ID) { + resultStarTreeNode = new FixedLengthStarTreeNode(in, resultStarTreeNodeId); + } } return resultStarTreeNode; } @@ -219,7 +227,7 @@ private FixedLengthStarTreeNode handleStarNode() throws IOException { /** * Checks if the given node matches the specified StarTreeNodeType. * - * @param firstNode The FixedLengthStarTreeNode to check. + * @param firstNode The FixedLengthStarTreeNode to check. * @param starTreeNodeType The StarTreeNodeType to match against. * @return The firstNode if its type matches the targetType, null otherwise. * @throws IOException If an I/O error occurs during the operation. @@ -233,68 +241,155 @@ private static FixedLengthStarTreeNode matchStarTreeNodeTypeOrNull(FixedLengthSt } } + @Override + public Iterator getChildrenIterator() throws IOException { + return new Iterator<>() { + private int currentChildId = firstChildId; + private final int lastChildId = getInt(LAST_CHILD_ID_OFFSET); + + @Override + public boolean hasNext() { + return currentChildId <= lastChildId; + } + + @Override + public FixedLengthStarTreeNode next() { + try { + return new FixedLengthStarTreeNode(in, currentChildId++); + } catch (IOException | RuntimeException e) { + throw new IllegalStateException(e); + } + } + + @Override + public void remove() { + throw new UnsupportedOperationException(); + } + }; + } + /** - * Performs a binary search to find a child node with the given dimension value. + * Finds and returns all children with dimension values between the given start and end values. * - * @param dimensionValue The dimension value to search for - * @return The child node if found, null otherwise + * @param startDimensionValue The start of the range (inclusive) + * @param endDimensionValue The end of the range (inclusive) + * @return A list of child nodes whose dimension values lie between the specified range * @throws IOException If there's an error reading from the input */ - private FixedLengthStarTreeNode binarySearchChild(long dimensionValue) throws IOException { + public Iterator range(long startDimensionValue, long endDimensionValue) throws IOException { + return new Iterator<>() { + int currentChildId = binarySearchForDimension(startDimensionValue, true); + final int lastChildId = getInt(LAST_CHILD_ID_OFFSET); + FixedLengthStarTreeNode nextNode = null; - int low = firstChildId; + @Override + public boolean hasNext() { + try { + // Continue iterating while we have valid children + while (currentChildId != INVALID_ID && currentChildId <= lastChildId) { + FixedLengthStarTreeNode currentNode = new FixedLengthStarTreeNode(in, currentChildId); + long currentDimensionValue = currentNode.getDimensionValue(); + + // If node exceeds endDimensionValue, we are out of the range + if (currentDimensionValue > endDimensionValue) { + return false; + } + + // If node is within the range, we prepare it as the next node + if (currentDimensionValue >= startDimensionValue) { + nextNode = currentNode; + currentChildId++; // Move to the next child for future iterations + return true; + } + + // Otherwise, move to the next child + currentChildId++; + } + } catch (IOException e) { + throw new UncheckedIOException(e); + } + return false; // No more children in range + } + + @Override + public FixedLengthStarTreeNode next() { + if (nextNode == null && !hasNext()) { + throw new NoSuchElementException(); + } + + FixedLengthStarTreeNode returnNode = nextNode; + nextNode = null; // Reset nextNode for the next call + return returnNode; + } + + @Override + public void remove() { + throw new UnsupportedOperationException(); + } + }; + } + + /** + * Performs a binary search to find a node with a dimension value. + * + * @param dimensionValue The dimension value to search for + * @param findFirstInRange If true, find the first node >= dimensionValue; otherwise, find exact matches + * @return The node ID if found, INVALID_ID otherwise + * @throws IOException If there's an error reading from the input + */ + private int binarySearchForDimension(long dimensionValue, boolean findFirstInRange) throws IOException { + int low = firstChildId; // if the current node is star node, increment the low to reduce the search space - if (matchStarTreeNodeTypeOrNull(new FixedLengthStarTreeNode(in, firstChildId), StarTreeNodeType.STAR) != null) { + if (isStarNode(low)) { low++; } int high = getInt(LAST_CHILD_ID_OFFSET); // if the current node is null node, decrement the high to reduce the search space - if (matchStarTreeNodeTypeOrNull(new FixedLengthStarTreeNode(in, high), StarTreeNodeType.NULL) != null) { + if (isNullNode(high)) { high--; } + int resultId = INVALID_ID; while (low <= high) { int mid = low + (high - low) / 2; FixedLengthStarTreeNode midNode = new FixedLengthStarTreeNode(in, mid); long midDimensionValue = midNode.getDimensionValue(); if (midDimensionValue == dimensionValue) { - return midNode; + return mid; // Exact match found } else if (midDimensionValue < dimensionValue) { low = mid + 1; } else { + resultId = mid; // Possible candidate for start of range high = mid - 1; } } - return null; - } - @Override - public Iterator getChildrenIterator() throws IOException { - return new Iterator<>() { - private int currentChildId = firstChildId; - private final int lastChildId = getInt(LAST_CHILD_ID_OFFSET); - - @Override - public boolean hasNext() { - return currentChildId <= lastChildId; - } + return findFirstInRange ? resultId : INVALID_ID; + } - @Override - public FixedLengthStarTreeNode next() { - try { - return new FixedLengthStarTreeNode(in, currentChildId++); - } catch (IOException e) { - throw new UncheckedIOException(e); - } - } + /** + * Checks if the node at the given ID is a star node. + * + * @param nodeId The ID of the node to check + * @return true if it's a star node, false otherwise + * @throws IOException If there's an error reading from the input + */ + private boolean isStarNode(int nodeId) throws IOException { + return matchStarTreeNodeTypeOrNull(new FixedLengthStarTreeNode(in, nodeId), StarTreeNodeType.STAR) != null; + } - @Override - public void remove() { - throw new UnsupportedOperationException(); - } - }; + /** + * Checks if the node at the given ID is a null node. + * + * @param nodeId The ID of the node to check + * @return true if it's a null node, false otherwise + * @throws IOException If there's an error reading from the input + */ + private boolean isNullNode(int nodeId) throws IOException { + return matchStarTreeNodeTypeOrNull(new FixedLengthStarTreeNode(in, nodeId), StarTreeNodeType.NULL) != null; } + } diff --git a/server/src/test/java/org/opensearch/index/compositeindex/datacube/startree/fileformats/node/FixedLengthStarTreeNodeTests.java b/server/src/test/java/org/opensearch/index/compositeindex/datacube/startree/fileformats/node/FixedLengthStarTreeNodeTests.java index 08815d5ef55f5..fc446492355e5 100644 --- a/server/src/test/java/org/opensearch/index/compositeindex/datacube/startree/fileformats/node/FixedLengthStarTreeNodeTests.java +++ b/server/src/test/java/org/opensearch/index/compositeindex/datacube/startree/fileformats/node/FixedLengthStarTreeNodeTests.java @@ -220,13 +220,83 @@ public void testOnlyRootNodePresent() throws IOException { assertEquals(starTreeNode.getNumChildren(), 0); assertNull(starTreeNode.getChildForDimensionValue(randomLong())); - assertThrows(IllegalArgumentException.class, () -> starTreeNode.getChildrenIterator().next()); + assertThrows(IllegalStateException.class, () -> starTreeNode.getChildrenIterator().next()); assertThrows(UnsupportedOperationException.class, () -> starTreeNode.getChildrenIterator().remove()); dataIn.close(); directory.close(); } + public void testRangeWithinBounds() throws IOException { + // Assume the starTreeNode is set up with valid nodes + long startDimensionValue = 0; // The first child node's value + long endDimensionValue = node.getChildren().get((long) (node.getChildren().size() - 3)).getDimensionValue(); // The last child + // node's value + + Iterator rangeIterator = starTreeNode.range(startDimensionValue, endDimensionValue); + + int count = 0; + while (rangeIterator.hasNext()) { + FixedLengthStarTreeNode currentNode = rangeIterator.next(); + assertNotNull(currentNode); + assertTrue(currentNode.getDimensionValue() >= startDimensionValue); + assertTrue(currentNode.getDimensionValue() <= endDimensionValue); + count++; + } + + assertEquals(node.getChildren().size() - 2, count); // All children should be included in the range + } + + public void testRangeOutsideBounds() throws IOException { + long startDimensionValue = node.getDimensionValue() + 1000; // Use a value larger than any child node's dimension value + long endDimensionValue = startDimensionValue + 100; // A range that doesn't exist + + Iterator rangeIterator = starTreeNode.range(startDimensionValue, endDimensionValue); + + // Expect no elements in this case + assertFalse(rangeIterator.hasNext()); + } + + public void testEmptyRange() throws IOException { + long startDimensionValue = -2; + long endDimensionValue = -2; + + Iterator rangeIterator = starTreeNode.range(startDimensionValue, endDimensionValue); + + // No elements expected as start equals end and there are no nodes with value 0 + assertFalse(rangeIterator.hasNext()); + } + + public void testRangeWithOnlySomeNodes() throws IOException { + // Assuming there are multiple nodes, test with a subset of the range + long startDimensionValue = 0; // Start at second child + long endDimensionValue = node.getChildren().get((long) node.getChildren().size() - 3).getDimensionValue(); // End before last child + + Iterator rangeIterator = starTreeNode.range(startDimensionValue, endDimensionValue); + + int count = 0; + while (rangeIterator.hasNext()) { + FixedLengthStarTreeNode currentNode = rangeIterator.next(); + assertNotNull(currentNode); + assertTrue(currentNode.getDimensionValue() >= startDimensionValue); + assertTrue(currentNode.getDimensionValue() <= endDimensionValue); + count++; + } + + // Check that the correct number of nodes in the range were returned + assertEquals(node.getChildren().size() - 2, count); + } + + public void testInvalidRange() throws IOException { + long startDimensionValue = 10; + long endDimensionValue = 5; // Invalid because start > end + + Iterator rangeIterator = starTreeNode.range(startDimensionValue, endDimensionValue); + + // No elements expected in this case + assertFalse(rangeIterator.hasNext()); + } + public void tearDown() throws Exception { super.tearDown(); dataIn.close();