Skip to content

Commit

Permalink
#405: Fix reading data from large concatenated values.
Browse files Browse the repository at this point in the history
Previously, `values.head.slice().getData()` reads all the data in the
value and then copied an explicit part from it. In case you read small
blocks from a large value, this is very inefficient.

Added a getData variant to Slice class that allows specifiying the
offset next to the limit.
  • Loading branch information
mvanaken committed Nov 28, 2023
1 parent 0c01be7 commit 14cd4ca
Show file tree
Hide file tree
Showing 3 changed files with 61 additions and 10 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
package io.parsingdata.metal.data;

import static java.math.BigInteger.ZERO;
import static java.math.BigInteger.valueOf;

import static io.parsingdata.metal.Trampoline.complete;
import static io.parsingdata.metal.Trampoline.intermediate;
Expand Down Expand Up @@ -79,13 +80,15 @@ private Trampoline<byte[]> getData(final ImmutableList<Value> values, final BigI
if (length.compareTo(ZERO) <= 0) {
return complete(() -> output);
}
if (currentOffset.add(values.head.slice().length).compareTo(offset) <= 0) {
return intermediate(() -> getData(values.tail, currentOffset.add(values.head.slice().length), currentDest, offset, length, output));
final BigInteger nextOffset = currentOffset.add(values.head.slice().length);
if (nextOffset.compareTo(offset) <= 0) {
return intermediate(() -> getData(values.tail, nextOffset, currentDest, offset, length, output));
}
final BigInteger localOffset = offset.subtract(currentOffset).compareTo(ZERO) < 0 ? ZERO : offset.subtract(currentOffset);
final BigInteger toCopy = length.compareTo(values.head.slice().length.subtract(localOffset)) > 0 ? values.head.slice().length.subtract(localOffset) : length;
System.arraycopy(values.head.slice().getData(), localOffset.intValueExact(), output, currentDest.intValueExact(), toCopy.intValueExact());
return intermediate(() -> getData(values.tail, currentOffset.add(values.head.slice().length), currentDest.add(toCopy), offset, length.subtract(toCopy), output));
// The second argument in getData in Slice is a limit. It will return less if the end of slice is reached.
final byte[] data = values.head.slice().getData(localOffset, length);
System.arraycopy(data, 0, output, currentDest.intValueExact(), data.length);
return intermediate(() -> getData(values.tail, nextOffset, currentDest.add(valueOf(data.length)), offset, length.subtract(valueOf(data.length)), output));
}

@Override
Expand Down
15 changes: 13 additions & 2 deletions core/src/main/java/io/parsingdata/metal/data/Slice.java
Original file line number Diff line number Diff line change
Expand Up @@ -58,8 +58,19 @@ public byte[] getData() {
}

public byte[] getData(final BigInteger limit) {
final BigInteger calculatedLength = checkNotNegative(limit, "limit").compareTo(length) > 0 ? length : limit;
return source.getData(offset, calculatedLength);
return getData(ZERO, limit);
}

/**
* Return a part of the data specified by the offset and limit.
* @param offset the offset to start reading the slice from
* @param limit the maximum number of bytes returned. Less bytes are returned if the end of slice is reached.
* @return a byte array representing the data.
*/
public byte[] getData(final BigInteger offset, final BigInteger limit) {
final BigInteger calculatedOffset = checkNotNegative(offset, "offset").add(this.offset);
final BigInteger calculatedLength = checkNotNegative(limit, "limit").min(length.subtract(offset)).max(ZERO);
return source.getData(calculatedOffset, calculatedLength);
}

@Override
Expand Down
43 changes: 40 additions & 3 deletions core/src/test/java/io/parsingdata/metal/data/SliceTest.java
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@

import static java.math.BigInteger.ONE;
import static java.math.BigInteger.TEN;
import static java.math.BigInteger.TWO;
import static java.math.BigInteger.ZERO;

import static org.junit.jupiter.api.Assertions.assertArrayEquals;
Expand Down Expand Up @@ -45,13 +46,21 @@
import java.util.Optional;

import org.junit.jupiter.api.Assertions;
import org.junit.jupiter.api.BeforeEach;
import org.junit.jupiter.api.Test;

import io.parsingdata.metal.util.InMemoryByteStream;
import io.parsingdata.metal.util.ReadTrackingByteStream;

public class SliceTest {

private static Slice slice;

@BeforeEach
public void setup() {
slice = Slice.createFromSource(new ConstantSource(new byte[]{0, 1, 2, 3}), ZERO, BigInteger.valueOf(4)).get();
}

@Test
public void lazyRead() {
final ReadTrackingByteStream stream = new ReadTrackingByteStream(new InMemoryByteStream(toByteArray(1, 2, 3, 0, 0, 4, 1)));
Expand Down Expand Up @@ -81,16 +90,44 @@ public void lazyLength() {
@Test
public void retrieveDataFromSliceWithNegativeLimit() {
final Exception e = Assertions.assertThrows(IllegalArgumentException.class, () ->
Slice.createFromSource(new ConstantSource(new byte[] { 0, 1, 2, 3 }), ZERO, BigInteger.valueOf(4)).get().getData(BigInteger.valueOf(-1))
slice.getData(BigInteger.valueOf(-1))
);
assertEquals("Argument limit may not be negative.", e.getMessage());
}

@Test
public void retrieveDataFromSliceWithNegativeOffset() {
final Exception e = Assertions.assertThrows(IllegalArgumentException.class, () ->
slice.getData(BigInteger.valueOf(-1), ONE)
);
assertEquals("Argument offset may not be negative.", e.getMessage());
}

@Test
public void retrieveDataFromSliceWithNegativeLimitAndOffset() {
final Exception e = Assertions.assertThrows(IllegalArgumentException.class, () ->
slice.getData(BigInteger.valueOf(-1), BigInteger.valueOf(-1))
);
assertEquals("Argument offset may not be negative.", e.getMessage());
}

@Test
public void retrieveDataFromSliceWithOffsetTooLarge() {
final Exception e = Assertions.assertThrows(IllegalStateException.class, () ->
slice.getData(slice.length.add(ONE), ONE)
);
assertEquals("Data to read is not available ([offset=5;length=0;source=ConstantSource(0x00010203)).", e.getMessage());
}

@Test
public void retrievePartialDataFromSlice() {
assertArrayEquals(new byte[] { 0 }, Slice.createFromSource(new ConstantSource(new byte[] { 0, 1, 2, 3 }), ZERO, BigInteger.valueOf(4)).get().getData(ONE));
assertArrayEquals(new byte[] { 0, 1, 2, 3 }, Slice.createFromSource(new ConstantSource(new byte[] { 0, 1, 2, 3 }), ZERO, BigInteger.valueOf(4)).get().getData(TEN));
// Limit within range
assertArrayEquals(new byte[] { 0 }, slice.getData(ONE));
assertArrayEquals(new byte[] { 1, 2 }, slice.getData(ONE, TWO));

// Limit outside range
assertArrayEquals(new byte[] { 0, 1, 2, 3 }, slice.getData(TEN));
assertArrayEquals(new byte[] { 1, 2, 3 }, slice.getData(ONE, TEN));
}

@Test
Expand Down

0 comments on commit 14cd4ca

Please sign in to comment.