Skip to content

Commit

Permalink
supporting range over star-tree file formats
Browse files Browse the repository at this point in the history
Signed-off-by: Sarthak Aggarwal <[email protected]>
  • Loading branch information
sarthakaggarwal97 committed Oct 4, 2024
1 parent aef7eca commit ed619e9
Show file tree
Hide file tree
Showing 2 changed files with 201 additions and 36 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -7,13 +7,16 @@
*/
package org.opensearch.index.compositeindex.datacube.startree.fileformats.node;

import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
import org.apache.lucene.store.RandomAccessInput;
import org.opensearch.index.compositeindex.datacube.startree.node.StarTreeNode;
import org.opensearch.index.compositeindex.datacube.startree.node.StarTreeNodeType;

import java.io.IOException;
import java.io.UncheckedIOException;
import java.util.Iterator;
import java.util.NoSuchElementException;

/**
* Fixed Length implementation of {@link StarTreeNode}.
Expand All @@ -36,6 +39,8 @@
*/
public class FixedLengthStarTreeNode implements StarTreeNode {

private static final Logger logger = LogManager.getLogger(FixedLengthStarTreeNode.class);

/**
* Number of integer fields in the serializable data
*/
Expand Down Expand Up @@ -200,7 +205,10 @@ public StarTreeNode getChildForDimensionValue(Long dimensionValue) throws IOExce

StarTreeNode resultStarTreeNode = null;
if (null != dimensionValue) {
resultStarTreeNode = binarySearchChild(dimensionValue);
int resultStarTreeNodeId = binarySearchForDimension(dimensionValue, false);
if (resultStarTreeNodeId != INVALID_ID) {
resultStarTreeNode = new FixedLengthStarTreeNode(in, resultStarTreeNodeId);
}
}
return resultStarTreeNode;
}
Expand All @@ -219,7 +227,7 @@ private FixedLengthStarTreeNode handleStarNode() throws IOException {
/**
* Checks if the given node matches the specified StarTreeNodeType.
*
* @param firstNode The FixedLengthStarTreeNode to check.
* @param firstNode The FixedLengthStarTreeNode to check.
* @param starTreeNodeType The StarTreeNodeType to match against.
* @return The firstNode if its type matches the targetType, null otherwise.
* @throws IOException If an I/O error occurs during the operation.
Expand All @@ -233,68 +241,155 @@ private static FixedLengthStarTreeNode matchStarTreeNodeTypeOrNull(FixedLengthSt
}
}

@Override
public Iterator<FixedLengthStarTreeNode> getChildrenIterator() throws IOException {
return new Iterator<>() {
private int currentChildId = firstChildId;
private final int lastChildId = getInt(LAST_CHILD_ID_OFFSET);

@Override
public boolean hasNext() {
return currentChildId <= lastChildId;
}

@Override
public FixedLengthStarTreeNode next() {
try {
return new FixedLengthStarTreeNode(in, currentChildId++);
} catch (IOException | RuntimeException e) {
throw new IllegalStateException(e);
}
}

@Override
public void remove() {
throw new UnsupportedOperationException();
}
};
}

/**
* Performs a binary search to find a child node with the given dimension value.
* Finds and returns all children with dimension values between the given start and end values.
*
* @param dimensionValue The dimension value to search for
* @return The child node if found, null otherwise
* @param startDimensionValue The start of the range (inclusive)
* @param endDimensionValue The end of the range (inclusive)
* @return A list of child nodes whose dimension values lie between the specified range
* @throws IOException If there's an error reading from the input
*/
private FixedLengthStarTreeNode binarySearchChild(long dimensionValue) throws IOException {
public Iterator<FixedLengthStarTreeNode> range(long startDimensionValue, long endDimensionValue) throws IOException {
return new Iterator<>() {
int currentChildId = binarySearchForDimension(startDimensionValue, true);
final int lastChildId = getInt(LAST_CHILD_ID_OFFSET);
FixedLengthStarTreeNode nextNode = null;

int low = firstChildId;
@Override
public boolean hasNext() {
try {
// Continue iterating while we have valid children
while (currentChildId != INVALID_ID && currentChildId <= lastChildId) {
FixedLengthStarTreeNode currentNode = new FixedLengthStarTreeNode(in, currentChildId);
long currentDimensionValue = currentNode.getDimensionValue();

// If node exceeds endDimensionValue, we are out of the range
if (currentDimensionValue > endDimensionValue) {
return false;
}

// If node is within the range, we prepare it as the next node
if (currentDimensionValue >= startDimensionValue) {
nextNode = currentNode;
currentChildId++; // Move to the next child for future iterations
return true;
}

// Otherwise, move to the next child
currentChildId++;
}
} catch (IOException e) {
throw new UncheckedIOException(e);
}
return false; // No more children in range
}

@Override
public FixedLengthStarTreeNode next() {
if (nextNode == null && !hasNext()) {
throw new NoSuchElementException();
}

FixedLengthStarTreeNode returnNode = nextNode;
nextNode = null; // Reset nextNode for the next call
return returnNode;
}

@Override
public void remove() {
throw new UnsupportedOperationException();
}
};
}

/**
* Performs a binary search to find a node with a dimension value.
*
* @param dimensionValue The dimension value to search for
* @param findFirstInRange If true, find the first node >= dimensionValue; otherwise, find exact matches
* @return The node ID if found, INVALID_ID otherwise
* @throws IOException If there's an error reading from the input
*/
private int binarySearchForDimension(long dimensionValue, boolean findFirstInRange) throws IOException {

int low = firstChildId;
// if the current node is star node, increment the low to reduce the search space
if (matchStarTreeNodeTypeOrNull(new FixedLengthStarTreeNode(in, firstChildId), StarTreeNodeType.STAR) != null) {
if (isStarNode(low)) {
low++;
}

int high = getInt(LAST_CHILD_ID_OFFSET);
// if the current node is null node, decrement the high to reduce the search space
if (matchStarTreeNodeTypeOrNull(new FixedLengthStarTreeNode(in, high), StarTreeNodeType.NULL) != null) {
if (isNullNode(high)) {
high--;
}

int resultId = INVALID_ID;
while (low <= high) {
int mid = low + (high - low) / 2;
FixedLengthStarTreeNode midNode = new FixedLengthStarTreeNode(in, mid);
long midDimensionValue = midNode.getDimensionValue();

if (midDimensionValue == dimensionValue) {
return midNode;
return mid; // Exact match found
} else if (midDimensionValue < dimensionValue) {
low = mid + 1;
} else {
resultId = mid; // Possible candidate for start of range
high = mid - 1;
}
}
return null;
}

@Override
public Iterator<FixedLengthStarTreeNode> getChildrenIterator() throws IOException {
return new Iterator<>() {
private int currentChildId = firstChildId;
private final int lastChildId = getInt(LAST_CHILD_ID_OFFSET);

@Override
public boolean hasNext() {
return currentChildId <= lastChildId;
}
return findFirstInRange ? resultId : INVALID_ID;
}

@Override
public FixedLengthStarTreeNode next() {
try {
return new FixedLengthStarTreeNode(in, currentChildId++);
} catch (IOException e) {
throw new UncheckedIOException(e);
}
}
/**
* Checks if the node at the given ID is a star node.
*
* @param nodeId The ID of the node to check
* @return true if it's a star node, false otherwise
* @throws IOException If there's an error reading from the input
*/
private boolean isStarNode(int nodeId) throws IOException {
return matchStarTreeNodeTypeOrNull(new FixedLengthStarTreeNode(in, nodeId), StarTreeNodeType.STAR) != null;
}

@Override
public void remove() {
throw new UnsupportedOperationException();
}
};
/**
* Checks if the node at the given ID is a null node.
*
* @param nodeId The ID of the node to check
* @return true if it's a null node, false otherwise
* @throws IOException If there's an error reading from the input
*/
private boolean isNullNode(int nodeId) throws IOException {
return matchStarTreeNodeTypeOrNull(new FixedLengthStarTreeNode(in, nodeId), StarTreeNodeType.NULL) != null;
}

}
Original file line number Diff line number Diff line change
Expand Up @@ -220,13 +220,83 @@ public void testOnlyRootNodePresent() throws IOException {

assertEquals(starTreeNode.getNumChildren(), 0);
assertNull(starTreeNode.getChildForDimensionValue(randomLong()));
assertThrows(IllegalArgumentException.class, () -> starTreeNode.getChildrenIterator().next());
assertThrows(IllegalStateException.class, () -> starTreeNode.getChildrenIterator().next());
assertThrows(UnsupportedOperationException.class, () -> starTreeNode.getChildrenIterator().remove());

dataIn.close();
directory.close();
}

public void testRangeWithinBounds() throws IOException {
// Assume the starTreeNode is set up with valid nodes
long startDimensionValue = 0; // The first child node's value
long endDimensionValue = node.getChildren().get((long) (node.getChildren().size() - 3)).getDimensionValue(); // The last child
// node's value

Iterator<FixedLengthStarTreeNode> rangeIterator = starTreeNode.range(startDimensionValue, endDimensionValue);

int count = 0;
while (rangeIterator.hasNext()) {
FixedLengthStarTreeNode currentNode = rangeIterator.next();
assertNotNull(currentNode);
assertTrue(currentNode.getDimensionValue() >= startDimensionValue);
assertTrue(currentNode.getDimensionValue() <= endDimensionValue);
count++;
}

assertEquals(node.getChildren().size() - 2, count); // All children should be included in the range
}

public void testRangeOutsideBounds() throws IOException {
long startDimensionValue = node.getDimensionValue() + 1000; // Use a value larger than any child node's dimension value
long endDimensionValue = startDimensionValue + 100; // A range that doesn't exist

Iterator<FixedLengthStarTreeNode> rangeIterator = starTreeNode.range(startDimensionValue, endDimensionValue);

// Expect no elements in this case
assertFalse(rangeIterator.hasNext());
}

public void testEmptyRange() throws IOException {
long startDimensionValue = -2;
long endDimensionValue = -2;

Iterator<FixedLengthStarTreeNode> rangeIterator = starTreeNode.range(startDimensionValue, endDimensionValue);

// No elements expected as start equals end and there are no nodes with value 0
assertFalse(rangeIterator.hasNext());
}

public void testRangeWithOnlySomeNodes() throws IOException {
// Assuming there are multiple nodes, test with a subset of the range
long startDimensionValue = 0; // Start at second child
long endDimensionValue = node.getChildren().get((long) node.getChildren().size() - 3).getDimensionValue(); // End before last child

Iterator<FixedLengthStarTreeNode> rangeIterator = starTreeNode.range(startDimensionValue, endDimensionValue);

int count = 0;
while (rangeIterator.hasNext()) {
FixedLengthStarTreeNode currentNode = rangeIterator.next();
assertNotNull(currentNode);
assertTrue(currentNode.getDimensionValue() >= startDimensionValue);
assertTrue(currentNode.getDimensionValue() <= endDimensionValue);
count++;
}

// Check that the correct number of nodes in the range were returned
assertEquals(node.getChildren().size() - 2, count);
}

public void testInvalidRange() throws IOException {
long startDimensionValue = 10;
long endDimensionValue = 5; // Invalid because start > end

Iterator<FixedLengthStarTreeNode> rangeIterator = starTreeNode.range(startDimensionValue, endDimensionValue);

// No elements expected in this case
assertFalse(rangeIterator.hasNext());
}

public void tearDown() throws Exception {
super.tearDown();
dataIn.close();
Expand Down

0 comments on commit ed619e9

Please sign in to comment.