Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Supporting Range over Star-Tree File Formats #16189

Open
wants to merge 1 commit into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -7,13 +7,16 @@
*/
package org.opensearch.index.compositeindex.datacube.startree.fileformats.node;

import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
import org.apache.lucene.store.RandomAccessInput;
import org.opensearch.index.compositeindex.datacube.startree.node.StarTreeNode;
import org.opensearch.index.compositeindex.datacube.startree.node.StarTreeNodeType;

import java.io.IOException;
import java.io.UncheckedIOException;
import java.util.Iterator;
import java.util.NoSuchElementException;

/**
* Fixed Length implementation of {@link StarTreeNode}.
Expand All @@ -36,6 +39,8 @@
*/
public class FixedLengthStarTreeNode implements StarTreeNode {

private static final Logger logger = LogManager.getLogger(FixedLengthStarTreeNode.class);

/**
* Number of integer fields in the serializable data
*/
Expand Down Expand Up @@ -200,7 +205,10 @@ public StarTreeNode getChildForDimensionValue(Long dimensionValue) throws IOExce

StarTreeNode resultStarTreeNode = null;
if (null != dimensionValue) {
resultStarTreeNode = binarySearchChild(dimensionValue);
int resultStarTreeNodeId = binarySearchForDimension(dimensionValue, false);
if (resultStarTreeNodeId != INVALID_ID) {
resultStarTreeNode = new FixedLengthStarTreeNode(in, resultStarTreeNodeId);
}
}
return resultStarTreeNode;
}
Expand All @@ -219,7 +227,7 @@ private FixedLengthStarTreeNode handleStarNode() throws IOException {
/**
* Checks if the given node matches the specified StarTreeNodeType.
*
* @param firstNode The FixedLengthStarTreeNode to check.
* @param firstNode The FixedLengthStarTreeNode to check.
* @param starTreeNodeType The StarTreeNodeType to match against.
* @return The firstNode if its type matches the targetType, null otherwise.
* @throws IOException If an I/O error occurs during the operation.
Expand All @@ -233,68 +241,155 @@ private static FixedLengthStarTreeNode matchStarTreeNodeTypeOrNull(FixedLengthSt
}
}

@Override
public Iterator<FixedLengthStarTreeNode> getChildrenIterator() throws IOException {
return new Iterator<>() {
private int currentChildId = firstChildId;
private final int lastChildId = getInt(LAST_CHILD_ID_OFFSET);

@Override
public boolean hasNext() {
return currentChildId <= lastChildId;
}

@Override
public FixedLengthStarTreeNode next() {
try {
return new FixedLengthStarTreeNode(in, currentChildId++);
} catch (IOException | RuntimeException e) {
throw new IllegalStateException(e);
}
}

@Override
public void remove() {
throw new UnsupportedOperationException();
}
};
}

/**
* Performs a binary search to find a child node with the given dimension value.
* Finds and returns all children with dimension values between the given start and end values.
*
* @param dimensionValue The dimension value to search for
* @return The child node if found, null otherwise
* @param startDimensionValue The start of the range (inclusive)
* @param endDimensionValue The end of the range (inclusive)
* @return A list of child nodes whose dimension values lie between the specified range
* @throws IOException If there's an error reading from the input
*/
private FixedLengthStarTreeNode binarySearchChild(long dimensionValue) throws IOException {
public Iterator<FixedLengthStarTreeNode> range(long startDimensionValue, long endDimensionValue) throws IOException {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

As discussed offline, we need to support list of ranges, with base case being just one range. Lets probably see if we can have a Range class similar to the one in RangeAggregator.

return new Iterator<>() {
int currentChildId = binarySearchForDimension(startDimensionValue, true);
final int lastChildId = getInt(LAST_CHILD_ID_OFFSET);
FixedLengthStarTreeNode nextNode = null;

int low = firstChildId;
@Override
public boolean hasNext() {
try {
// Continue iterating while we have valid children
while (currentChildId != INVALID_ID && currentChildId <= lastChildId) {
FixedLengthStarTreeNode currentNode = new FixedLengthStarTreeNode(in, currentChildId);
long currentDimensionValue = currentNode.getDimensionValue();

// If node exceeds endDimensionValue, we are out of the range
if (currentDimensionValue > endDimensionValue) {
return false;
}

// If node is within the range, we prepare it as the next node
if (currentDimensionValue >= startDimensionValue) {
nextNode = currentNode;
currentChildId++; // Move to the next child for future iterations
return true;
}

// Otherwise, move to the next child
currentChildId++;
}
} catch (IOException e) {
throw new UncheckedIOException(e);
}
return false; // No more children in range
}

@Override
public FixedLengthStarTreeNode next() {
if (nextNode == null && !hasNext()) {
throw new NoSuchElementException();
}

FixedLengthStarTreeNode returnNode = nextNode;
nextNode = null; // Reset nextNode for the next call
return returnNode;
}

@Override
public void remove() {
throw new UnsupportedOperationException();
}
};
}

/**
* Performs a binary search to find a node with a dimension value.
*
* @param dimensionValue The dimension value to search for
* @param findFirstInRange If true, find the first node >= dimensionValue; otherwise, find exact matches
* @return The node ID if found, INVALID_ID otherwise
* @throws IOException If there's an error reading from the input
*/
private int binarySearchForDimension(long dimensionValue, boolean findFirstInRange) throws IOException {

int low = firstChildId;
// if the current node is star node, increment the low to reduce the search space
if (matchStarTreeNodeTypeOrNull(new FixedLengthStarTreeNode(in, firstChildId), StarTreeNodeType.STAR) != null) {
if (isStarNode(low)) {
low++;
}

int high = getInt(LAST_CHILD_ID_OFFSET);
// if the current node is null node, decrement the high to reduce the search space
if (matchStarTreeNodeTypeOrNull(new FixedLengthStarTreeNode(in, high), StarTreeNodeType.NULL) != null) {
if (isNullNode(high)) {
high--;
}

int resultId = INVALID_ID;
while (low <= high) {
int mid = low + (high - low) / 2;
FixedLengthStarTreeNode midNode = new FixedLengthStarTreeNode(in, mid);
long midDimensionValue = midNode.getDimensionValue();

if (midDimensionValue == dimensionValue) {
return midNode;
return mid; // Exact match found
} else if (midDimensionValue < dimensionValue) {
low = mid + 1;
} else {
resultId = mid; // Possible candidate for start of range
high = mid - 1;
}
}
return null;
}

@Override
public Iterator<FixedLengthStarTreeNode> getChildrenIterator() throws IOException {
return new Iterator<>() {
private int currentChildId = firstChildId;
private final int lastChildId = getInt(LAST_CHILD_ID_OFFSET);

@Override
public boolean hasNext() {
return currentChildId <= lastChildId;
}
return findFirstInRange ? resultId : INVALID_ID;
}

@Override
public FixedLengthStarTreeNode next() {
try {
return new FixedLengthStarTreeNode(in, currentChildId++);
} catch (IOException e) {
throw new UncheckedIOException(e);
}
}
/**
* Checks if the node at the given ID is a star node.
*
* @param nodeId The ID of the node to check
* @return true if it's a star node, false otherwise
* @throws IOException If there's an error reading from the input
*/
private boolean isStarNode(int nodeId) throws IOException {
return matchStarTreeNodeTypeOrNull(new FixedLengthStarTreeNode(in, nodeId), StarTreeNodeType.STAR) != null;
}

@Override
public void remove() {
throw new UnsupportedOperationException();
}
};
/**
* Checks if the node at the given ID is a null node.
*
* @param nodeId The ID of the node to check
* @return true if it's a null node, false otherwise
* @throws IOException If there's an error reading from the input
*/
private boolean isNullNode(int nodeId) throws IOException {
return matchStarTreeNodeTypeOrNull(new FixedLengthStarTreeNode(in, nodeId), StarTreeNodeType.NULL) != null;
}

}
Original file line number Diff line number Diff line change
Expand Up @@ -220,13 +220,83 @@ public void testOnlyRootNodePresent() throws IOException {

assertEquals(starTreeNode.getNumChildren(), 0);
assertNull(starTreeNode.getChildForDimensionValue(randomLong()));
assertThrows(IllegalArgumentException.class, () -> starTreeNode.getChildrenIterator().next());
assertThrows(IllegalStateException.class, () -> starTreeNode.getChildrenIterator().next());
assertThrows(UnsupportedOperationException.class, () -> starTreeNode.getChildrenIterator().remove());

dataIn.close();
directory.close();
}

public void testRangeWithinBounds() throws IOException {
// Assume the starTreeNode is set up with valid nodes
long startDimensionValue = 0; // The first child node's value
long endDimensionValue = node.getChildren().get((long) (node.getChildren().size() - 3)).getDimensionValue(); // The last child
// node's value

Iterator<FixedLengthStarTreeNode> rangeIterator = starTreeNode.range(startDimensionValue, endDimensionValue);

int count = 0;
while (rangeIterator.hasNext()) {
FixedLengthStarTreeNode currentNode = rangeIterator.next();
assertNotNull(currentNode);
assertTrue(currentNode.getDimensionValue() >= startDimensionValue);
assertTrue(currentNode.getDimensionValue() <= endDimensionValue);
count++;
}

assertEquals(node.getChildren().size() - 2, count); // All children should be included in the range
}

public void testRangeOutsideBounds() throws IOException {
long startDimensionValue = node.getDimensionValue() + 1000; // Use a value larger than any child node's dimension value
long endDimensionValue = startDimensionValue + 100; // A range that doesn't exist

Iterator<FixedLengthStarTreeNode> rangeIterator = starTreeNode.range(startDimensionValue, endDimensionValue);

// Expect no elements in this case
assertFalse(rangeIterator.hasNext());
}

public void testEmptyRange() throws IOException {
long startDimensionValue = -2;
long endDimensionValue = -2;

Iterator<FixedLengthStarTreeNode> rangeIterator = starTreeNode.range(startDimensionValue, endDimensionValue);

// No elements expected as start equals end and there are no nodes with value 0
assertFalse(rangeIterator.hasNext());
}

public void testRangeWithOnlySomeNodes() throws IOException {
// Assuming there are multiple nodes, test with a subset of the range
long startDimensionValue = 0; // Start at second child
long endDimensionValue = node.getChildren().get((long) node.getChildren().size() - 3).getDimensionValue(); // End before last child

Iterator<FixedLengthStarTreeNode> rangeIterator = starTreeNode.range(startDimensionValue, endDimensionValue);

int count = 0;
while (rangeIterator.hasNext()) {
FixedLengthStarTreeNode currentNode = rangeIterator.next();
assertNotNull(currentNode);
assertTrue(currentNode.getDimensionValue() >= startDimensionValue);
assertTrue(currentNode.getDimensionValue() <= endDimensionValue);
count++;
}

// Check that the correct number of nodes in the range were returned
assertEquals(node.getChildren().size() - 2, count);
}

public void testInvalidRange() throws IOException {
long startDimensionValue = 10;
long endDimensionValue = 5; // Invalid because start > end

Iterator<FixedLengthStarTreeNode> rangeIterator = starTreeNode.range(startDimensionValue, endDimensionValue);

// No elements expected in this case
assertFalse(rangeIterator.hasNext());
}

public void tearDown() throws Exception {
super.tearDown();
dataIn.close();
Expand Down
Loading