Skip to content

Commit

Permalink
Merge pull request #406 from parsingdata/#405_concatenatedvaluesource…
Browse files Browse the repository at this point in the history
…_performance_bug

#405 ConcatenatedValueSource performance bug
  • Loading branch information
mvanaken authored Feb 6, 2024
2 parents fc68dd2 + 504572f commit e223c28
Show file tree
Hide file tree
Showing 4 changed files with 102 additions and 10 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
package io.parsingdata.metal.data;

import static java.math.BigInteger.ZERO;
import static java.math.BigInteger.valueOf;

import static io.parsingdata.metal.Trampoline.complete;
import static io.parsingdata.metal.Trampoline.intermediate;
Expand Down Expand Up @@ -79,13 +80,15 @@ private Trampoline<byte[]> getData(final ImmutableList<Value> values, final BigI
if (length.compareTo(ZERO) <= 0) {
return complete(() -> output);
}
if (currentOffset.add(values.head.slice().length).compareTo(offset) <= 0) {
return intermediate(() -> getData(values.tail, currentOffset.add(values.head.slice().length), currentDest, offset, length, output));
final BigInteger nextOffset = currentOffset.add(values.head.slice().length);
if (nextOffset.compareTo(offset) <= 0) {
return intermediate(() -> getData(values.tail, nextOffset, currentDest, offset, length, output));
}
final BigInteger localOffset = offset.subtract(currentOffset).compareTo(ZERO) < 0 ? ZERO : offset.subtract(currentOffset);
final BigInteger toCopy = length.compareTo(values.head.slice().length.subtract(localOffset)) > 0 ? values.head.slice().length.subtract(localOffset) : length;
System.arraycopy(values.head.slice().getData(), localOffset.intValueExact(), output, currentDest.intValueExact(), toCopy.intValueExact());
return intermediate(() -> getData(values.tail, currentOffset.add(values.head.slice().length), currentDest.add(toCopy), offset, length.subtract(toCopy), output));
// The second argument in getData in Slice is a limit. It will return less if the end of slice is reached.
final byte[] data = values.head.slice().getData(localOffset, length);
System.arraycopy(data, 0, output, currentDest.intValueExact(), data.length);
return intermediate(() -> getData(values.tail, nextOffset, currentDest.add(valueOf(data.length)), offset, length.subtract(valueOf(data.length)), output));
}

@Override
Expand Down
15 changes: 13 additions & 2 deletions core/src/main/java/io/parsingdata/metal/data/Slice.java
Original file line number Diff line number Diff line change
Expand Up @@ -58,8 +58,19 @@ public byte[] getData() {
}

public byte[] getData(final BigInteger limit) {
final BigInteger calculatedLength = checkNotNegative(limit, "limit").compareTo(length) > 0 ? length : limit;
return source.getData(offset, calculatedLength);
return getData(ZERO, limit);
}

/**
* Return a part of the data specified by the offset and limit.
* @param offset the offset to start reading the slice from
* @param limit the maximum number of bytes returned. Fewer bytes are returned if the end of slice is reached.
* @return a byte array representing the data.
*/
public byte[] getData(final BigInteger offset, final BigInteger limit) {
final BigInteger calculatedOffset = checkNotNegative(offset, "offset").add(this.offset);
final BigInteger calculatedLength = checkNotNegative(limit, "limit").min(length.subtract(offset)).max(ZERO);
return source.getData(calculatedOffset, calculatedLength);
}

@Override
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,20 +16,28 @@

package io.parsingdata.metal.data;

import static java.math.BigInteger.valueOf;

import static org.junit.jupiter.api.Assertions.assertArrayEquals;
import static org.junit.jupiter.api.Assertions.assertEquals;

import static io.parsingdata.metal.data.Slice.createFromSource;
import static io.parsingdata.metal.expression.value.ConstantFactory.createFromBytes;
import static io.parsingdata.metal.util.EncodingFactory.enc;

import java.math.BigInteger;
import java.util.Arrays;
import java.util.Collection;
import java.util.List;
import java.util.Random;

import org.junit.jupiter.api.BeforeAll;
import org.junit.jupiter.api.Test;
import org.junit.jupiter.api.Timeout;
import org.junit.jupiter.params.ParameterizedTest;
import org.junit.jupiter.params.provider.MethodSource;

import io.parsingdata.metal.encoding.Encoding;
import io.parsingdata.metal.expression.value.CoreValue;
import io.parsingdata.metal.expression.value.Value;

Expand Down Expand Up @@ -90,4 +98,37 @@ public void checkData(final String description, final int offset, final int leng
}
}

@Test
@Timeout(value=1)
public void concatenatedValueSourceRead() {
// Create a large array with random data
final int arraySize = 5_120_000;
final byte[] bytes = new byte[arraySize];
new Random().nextBytes(bytes);

// Split the data in separate CoreValues.
final int parts = 4;
ImmutableList<Value> values = new ImmutableList<>();
for (int part = 0; part < parts; part++) {
values = values.add(new CoreValue(Slice.createFromBytes(Arrays.copyOfRange(bytes, (arraySize / parts) * part, (arraySize / parts) * (part + 1))), Encoding.DEFAULT_ENCODING));
}

// Create a ConcatenatedValueSource to read from.
final ConcatenatedValueSource source = ConcatenatedValueSource.create(values).get();

// Read from the source in small parts.
final int readSize = 512;
final byte[] bytesRead = new byte[arraySize];
final long start = System.currentTimeMillis();
for (int part = 0; part < arraySize / readSize; part++) {
final byte[] data = source.getData(valueOf(readSize * part), valueOf(readSize));
System.arraycopy(data, 0, bytesRead, readSize * part, data.length);
}
final long end = System.currentTimeMillis();
System.out.printf("Source read: %ss%n", (end - start) / 1000.0);

// Make sure we read the data correctly.
assertArrayEquals(bytes, bytesRead);
}

}
43 changes: 40 additions & 3 deletions core/src/test/java/io/parsingdata/metal/data/SliceTest.java
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@

import static java.math.BigInteger.ONE;
import static java.math.BigInteger.TEN;
import static java.math.BigInteger.TWO;
import static java.math.BigInteger.ZERO;

import static org.junit.jupiter.api.Assertions.assertArrayEquals;
Expand Down Expand Up @@ -45,13 +46,21 @@
import java.util.Optional;

import org.junit.jupiter.api.Assertions;
import org.junit.jupiter.api.BeforeEach;
import org.junit.jupiter.api.Test;

import io.parsingdata.metal.util.InMemoryByteStream;
import io.parsingdata.metal.util.ReadTrackingByteStream;

public class SliceTest {

private static Slice slice;

@BeforeEach
public void setup() {
slice = Slice.createFromSource(new ConstantSource(new byte[]{0, 1, 2, 3}), ZERO, BigInteger.valueOf(4)).get();
}

@Test
public void lazyRead() {
final ReadTrackingByteStream stream = new ReadTrackingByteStream(new InMemoryByteStream(toByteArray(1, 2, 3, 0, 0, 4, 1)));
Expand Down Expand Up @@ -81,16 +90,44 @@ public void lazyLength() {
@Test
public void retrieveDataFromSliceWithNegativeLimit() {
final Exception e = Assertions.assertThrows(IllegalArgumentException.class, () ->
Slice.createFromSource(new ConstantSource(new byte[] { 0, 1, 2, 3 }), ZERO, BigInteger.valueOf(4)).get().getData(BigInteger.valueOf(-1))
slice.getData(BigInteger.valueOf(-1))
);
assertEquals("Argument limit may not be negative.", e.getMessage());
}

@Test
public void retrieveDataFromSliceWithNegativeOffset() {
final Exception e = Assertions.assertThrows(IllegalArgumentException.class, () ->
slice.getData(BigInteger.valueOf(-1), ONE)
);
assertEquals("Argument offset may not be negative.", e.getMessage());
}

@Test
public void retrieveDataFromSliceWithNegativeLimitAndOffset() {
final Exception e = Assertions.assertThrows(IllegalArgumentException.class, () ->
slice.getData(BigInteger.valueOf(-1), BigInteger.valueOf(-1))
);
assertEquals("Argument offset may not be negative.", e.getMessage());
}

@Test
public void retrieveDataFromSliceWithOffsetTooLarge() {
final Exception e = Assertions.assertThrows(IllegalStateException.class, () ->
slice.getData(slice.length.add(ONE), ONE)
);
assertEquals("Data to read is not available ([offset=5;length=0;source=ConstantSource(0x00010203)).", e.getMessage());
}

@Test
public void retrievePartialDataFromSlice() {
assertArrayEquals(new byte[] { 0 }, Slice.createFromSource(new ConstantSource(new byte[] { 0, 1, 2, 3 }), ZERO, BigInteger.valueOf(4)).get().getData(ONE));
assertArrayEquals(new byte[] { 0, 1, 2, 3 }, Slice.createFromSource(new ConstantSource(new byte[] { 0, 1, 2, 3 }), ZERO, BigInteger.valueOf(4)).get().getData(TEN));
// Limit within range
assertArrayEquals(new byte[] { 0 }, slice.getData(ONE));
assertArrayEquals(new byte[] { 1, 2 }, slice.getData(ONE, TWO));

// Limit outside range
assertArrayEquals(new byte[] { 0, 1, 2, 3 }, slice.getData(TEN));
assertArrayEquals(new byte[] { 1, 2, 3 }, slice.getData(ONE, TEN));
}

@Test
Expand Down

0 comments on commit e223c28

Please sign in to comment.