From 3ca65dcb9ab24421d105cee9e55b312781e261f7 Mon Sep 17 00:00:00 2001 From: Marja van Aken Date: Wed, 11 Sep 2024 15:20:21 +0200 Subject: [PATCH] Add cache for tokens to the ParseValueCache. May need a rename of the class since it now caches more then parse values. --- .../io/parsingdata/metal/data/ParseState.java | 3 +- .../metal/data/ParseValueCache.java | 34 ++++++++++--- .../io/parsingdata/metal/token/TokenRef.java | 3 +- .../metal/data/ParseValueCacheTest.java | 51 +++++++++++++++++++ 4 files changed, 81 insertions(+), 10 deletions(-) diff --git a/core/src/main/java/io/parsingdata/metal/data/ParseState.java b/core/src/main/java/io/parsingdata/metal/data/ParseState.java index 0c368070..5bf8deba 100644 --- a/core/src/main/java/io/parsingdata/metal/data/ParseState.java +++ b/core/src/main/java/io/parsingdata/metal/data/ParseState.java @@ -61,9 +61,8 @@ public static ParseState createFromByteStream(final ByteStream input) { } public ParseState addBranch(final Token token) { - return new ParseState(order.addBranch(token), cache, source, offset, token.isIterable() ? iterations.add(new ImmutablePair<>(token, ZERO)) : iterations, references); + return new ParseState(order.addBranch(token), cache.add(token), source, offset, token.isIterable() ? iterations.add(new ImmutablePair<>(token, ZERO)) : iterations, references); } - public ParseState closeBranch(final Token token) { if (token.isIterable() && !iterations.head.left.equals(token)) { throw new IllegalStateException(format("Cannot close branch for iterable token %s. Current iteration state is for token %s.", token.name, iterations.head.left.name)); diff --git a/core/src/main/java/io/parsingdata/metal/data/ParseValueCache.java b/core/src/main/java/io/parsingdata/metal/data/ParseValueCache.java index 50dc52b5..aa2d727e 100644 --- a/core/src/main/java/io/parsingdata/metal/data/ParseValueCache.java +++ b/core/src/main/java/io/parsingdata/metal/data/ParseValueCache.java @@ -17,9 +17,10 @@ public class ParseValueCache { - public static final ParseValueCache NO_CACHE = new ParseValueCache(null); + public static final ParseValueCache NO_CACHE = new ParseValueCache(null, null); private final Map> cache; + private final Map tokenCache; /** * Start a cache that keeps track of values added to the parse graph. @@ -27,12 +28,13 @@ public class ParseValueCache { * In case no caching is desired, {@link #NO_CACHE} should be used instead. */ public ParseValueCache() { - this(new HashMap<>()); + this(new HashMap<>(), new HashMap<>()); } // For internal use only. It is private to avoid setting the cache to null. The NO_CACHE constant should be used instead. - private ParseValueCache(final Map> cache) { + private ParseValueCache(final Map> cache, final Map tokenCache) { this.cache = cache; + this.tokenCache = tokenCache; } public Optional> find(final String scopeName, int limit) { @@ -59,10 +61,23 @@ public ParseValueCache add(final ParseValue value) { return NO_CACHE; } final String name = shortName(value.name); - final Map> stringImmutableListHashMap = new HashMap<>(cache); - stringImmutableListHashMap.computeIfAbsent(name, pattern -> new ImmutableList<>()); - stringImmutableListHashMap.computeIfPresent(name, (pattern, valueImmutableList) -> valueImmutableList.add(value)); - return new ParseValueCache(stringImmutableListHashMap); + final Map> newCache = new HashMap<>(cache); + newCache.computeIfAbsent(name, pattern -> new ImmutableList<>()); + newCache.computeIfPresent(name, (pattern, valueImmutableList) -> valueImmutableList.add(value)); + return new ParseValueCache(newCache, tokenCache); + } + + public ParseValueCache add(final Token token) { + if (this == NO_CACHE) { + return NO_CACHE; + } + final String name = token.name; + if (name.isEmpty()) { + return this; + } + final Map newTokenCache = new HashMap<>(tokenCache); + newTokenCache.put(name, token); + return new ParseValueCache(cache, newTokenCache); } private static String shortName(final String name) { @@ -87,4 +102,9 @@ public boolean equals(final Object obj) { public int hashCode() { return Objects.hash(cache); } + + public Optional findToken(String referenceName) { + System.out.println("ParseValueCache.findToken"); + return Optional.ofNullable(tokenCache.get(referenceName)); + } } diff --git a/core/src/main/java/io/parsingdata/metal/token/TokenRef.java b/core/src/main/java/io/parsingdata/metal/token/TokenRef.java index 28c7b161..063db035 100644 --- a/core/src/main/java/io/parsingdata/metal/token/TokenRef.java +++ b/core/src/main/java/io/parsingdata/metal/token/TokenRef.java @@ -64,7 +64,8 @@ public TokenRef(final String name, final String referenceName, final Encoding en @Override protected Optional parseImpl(final Environment environment) { - return lookup(ImmutableList.create(environment.parseState.order), referenceName).computeResult().parse(environment); +// return lookup(ImmutableList.create(environment.parseState.order), referenceName).computeResult().parse(environment); + return environment.parseState.cache.findToken(referenceName).orElse(LOOKUP_FAILED).parse(environment); } private Trampoline lookup(final ImmutableList items, final String referenceName) { diff --git a/core/src/test/java/io/parsingdata/metal/data/ParseValueCacheTest.java b/core/src/test/java/io/parsingdata/metal/data/ParseValueCacheTest.java index 937ade63..58c9666e 100644 --- a/core/src/test/java/io/parsingdata/metal/data/ParseValueCacheTest.java +++ b/core/src/test/java/io/parsingdata/metal/data/ParseValueCacheTest.java @@ -1,9 +1,23 @@ package io.parsingdata.metal.data; +import static io.parsingdata.metal.Shorthand.CURRENT_ITERATION; +import static io.parsingdata.metal.Shorthand.eqNum; +import static io.parsingdata.metal.Shorthand.last; +import static io.parsingdata.metal.Shorthand.nod; +import static io.parsingdata.metal.Shorthand.opt; +import static io.parsingdata.metal.Shorthand.rep; +import static io.parsingdata.metal.Shorthand.repn; import static io.parsingdata.metal.Shorthand.seq; +import static io.parsingdata.metal.Shorthand.tie; +import static io.parsingdata.metal.Shorthand.token; +import static io.parsingdata.metal.Shorthand.when; +import static io.parsingdata.metal.data.ParseState.createFromByteStream; +import static io.parsingdata.metal.util.EnvironmentFactory.env; import static io.parsingdata.metal.util.TokenDefinitions.any; import static java.math.BigInteger.ZERO; +import static org.hamcrest.MatcherAssert.assertThat; +import static org.hamcrest.Matchers.equalTo; import static org.junit.jupiter.api.Assertions.assertEquals; import static org.junit.jupiter.api.Assertions.assertFalse; import static org.junit.jupiter.api.Assertions.assertNotNull; @@ -20,12 +34,15 @@ import static io.parsingdata.metal.data.Slice.createFromBytes; import static io.parsingdata.metal.util.EncodingFactory.enc; +import java.util.HashMap; import java.util.List; +import java.util.Map; import java.util.Optional; import java.util.stream.Stream; import org.junit.jupiter.api.BeforeAll; import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.Timeout; import org.junit.jupiter.params.ParameterizedTest; import org.junit.jupiter.params.provider.Arguments; import org.junit.jupiter.params.provider.MethodSource; @@ -34,6 +51,7 @@ import io.parsingdata.metal.expression.value.Value; import io.parsingdata.metal.expression.value.ValueExpression; import io.parsingdata.metal.token.Token; +import io.parsingdata.metal.util.InMemoryByteStream; class ParseValueCacheTest { @@ -189,4 +207,37 @@ public void cacheUsageTest(final String testName, final ValueExpression expressi // That means, if result is not empty, the cache was used. assertEquals(shouldUseCache, !eval.isEmpty()); } + + // Note: This timeout does not stop the test after 1 second. + // The test will run until it finishes and then validate the duration. + @Timeout(value = 50) + @Test + void performanceTest() { + // This test would take way too much time without tokenref caching (~17sec). + // Using tokenref cashing, these are all finished within less than 100 ms. + final int dataSize = 1_000_000; + final byte[] input = new byte[dataSize + 2 + 3]; + // This token contains recursive tokens to create large ParseGraphs. + final Token deep = + seq( + seq("tokenref", + def("data1", 1), + def("data2", 1) + ), + rep("token", + seq("seq", + seq( + def("byte", 1), + nod(0) + ), + when(token("tokenref"), eqNum(CURRENT_ITERATION, con(dataSize))) + ) + ) + ); + final Optional result = deep.parse(env(createFromByteStream(new InMemoryByteStream(input)))); + assertTrue(result.isPresent()); + + ImmutableList allValues = Selection.getAllValues(result.get().order, x -> true); + assertThat(allValues.size, equalTo(dataSize + 5L)); + } } \ No newline at end of file