From 33ad3902fd580c5b83bcbe9b765f081f66c723e9 Mon Sep 17 00:00:00 2001
From: barbosf
+ * Path extractor takes registered paths and when it finds one during stream processing invokes the respective callback.
+ * This allows the Ion reader to plan the most efficient traversal over the data without requiring further manual
+ * interaction from the user.
+ *
+ * For example, there is no reason to step in to containers which could not possibly match one of the search paths. When
+ * encoded in binary Ion, the resulting skip is a seek forward in the input stream, which is inexpensive relative to the
+ * cost of parsing (and in the case of a DOM, materializing) the skipped value.
+ *
+ * The callback receives the matcher's {@link IonReader}, positioned on the matching value, so that it can use the
+ * appropriate reader method to access the value. The callback return value is as a ‘step-out-N’ instruction.
+ * The most common value is zero, which tells the extractor to continue with the next value at the same depth.
+ * A return value greater than zero may be useful to users who only care about the first match at a particular
+ * depth
+ *
+ * Callback implementations MUST comply with the following:
+ * > stack;
+
+ Tracker(final int size) {
+ stack = new ArrayDeque<>(size);
+ }
+
+ void reset(final List
+ * defaults to false.
+ *
+ * @param matchRelativePaths new config value.
+ * @return builder for chaining.
+ */
+ public PathExtractorBuilder withMatchRelativePaths(final boolean matchRelativePaths) {
+ this.matchRelativePaths = matchRelativePaths;
+
+ return this;
+ }
+
+ /**
+ * Sets matchCaseInsensitive config. When true the path extractor will match fields ignoring case, when false the
+ * path extractor will mach respecting the path components case.
+ *
+ *
+ * defaults to false.
+ *
+ * @param matchCaseInsensitive new config value.
+ * @return builder for chaining.
+ */
+ public PathExtractorBuilder withMatchCaseInsensitive(final boolean matchCaseInsensitive) {
+ this.matchCaseInsensitive = matchCaseInsensitive;
+
+ return this;
+ }
+
+ /**
+ * Register a callback for a search path.
+ *
+ * @param searchExpressionAsIon string representation of a search path.
+ * @param callback callback to be registered.
+ * @return builder for chaining.
+ * @see PathExtractorBuilder#register(List, Function)
+ */
+ public PathExtractorBuilder register(final String searchExpressionAsIon,
+ final Function
+ *
+ *
+ * data: {foo: [1,2,3], bar: { baz: [1] }} + * + * search path | callback invoked with reader at + * ------------|-------------------- + * (0) | [1, 2, 3] + * (0 2) | 3 + *+ */ +public class Index implements PathComponent { + + private final int ordinal; + + /** + * Constructor. + * + * @param ordinal component ordinal. + */ + public Index(final int ordinal) { + this.ordinal = ordinal; + } + + @Override + public boolean matches(final IonReader reader, final int currentPosition, final PathExtractorConfig config) { + return ordinal == currentPosition; + } +} diff --git a/src/main/java/software/amazon/com/ionpathextraction/pathcomponents/PathComponent.java b/src/main/java/software/amazon/com/ionpathextraction/pathcomponents/PathComponent.java new file mode 100644 index 0000000..8318475 --- /dev/null +++ b/src/main/java/software/amazon/com/ionpathextraction/pathcomponents/PathComponent.java @@ -0,0 +1,40 @@ +/* + * Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. + * Licensed under the Apache License, Version 2.0 (the "License"). + * You may not use this file except in compliance with the License. + * A copy of the License is located at: + * + * http://aws.amazon.com/apache2.0/ + * + * or in the "license" file accompanying this file. This file is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific + * language governing permissions and limitations under the License. + */ + +package software.amazon.com.ionpathextraction.pathcomponents; + +import software.amazon.com.ionpathextraction.PathExtractor; +import software.amazon.com.ionpathextraction.PathExtractorConfig; +import software.amazon.ion.IonReader; + +/** + * A search path component, for example the path (foo * 1) has three components. + * + *
+ * data: {foo: [1,2,3], bar: { baz: [1] }} + * + * search path | callback invoked with reader at + * ------------|-------------------- + * (foo) | [1, 2, 3] + * (bar baz) | [1] + *+ */ +public class Text implements PathComponent { + + private final String fieldName; + + /** + * Constructor. + * + * @param fieldName component field name. + */ + public Text(final String fieldName) { + checkArgument(fieldName != null, "fieldName cannot be null"); + + this.fieldName = fieldName; + } + + @Override + public boolean matches(final IonReader reader, final int currentPosition, final PathExtractorConfig config) { + if (!reader.isInStruct()) { + return false; + } + + return config.isMatchCaseInsensitive() + ? fieldName.equalsIgnoreCase(reader.getFieldName()) + : fieldName.equals(reader.getFieldName()); + } +} diff --git a/src/main/java/software/amazon/com/ionpathextraction/pathcomponents/Wildcard.java b/src/main/java/software/amazon/com/ionpathextraction/pathcomponents/Wildcard.java new file mode 100644 index 0000000..6c42d47 --- /dev/null +++ b/src/main/java/software/amazon/com/ionpathextraction/pathcomponents/Wildcard.java @@ -0,0 +1,47 @@ +/* + * Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. + * Licensed under the Apache License, Version 2.0 (the "License"). + * You may not use this file except in compliance with the License. + * A copy of the License is located at: + * + * http://aws.amazon.com/apache2.0/ + * + * or in the "license" file accompanying this file. This file is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific + * language governing permissions and limitations under the License. + */ + +package software.amazon.com.ionpathextraction.pathcomponents; + +import software.amazon.com.ionpathextraction.PathExtractorConfig; +import software.amazon.ion.IonReader; + +/** + * Wildcard path component matches any value, example. + *
+ * data: {foo: [1,2,3], bar: { baz: [1] }} + * + * search path | callback invoked with reader at + * ------------|-------------------- + * (*) | [1, 2, 3] and { baz: [1] } + * (* *) | 1, 2, 3 and [1] + *+ */ +public class Wildcard implements PathComponent { + + public static final String TEXT = "*"; + + /** + * Singleton {@link Wildcard} instance. + */ + public static final Wildcard INSTANCE = new Wildcard(); + + /** use INSTANCE. */ + private Wildcard() { + } + + @Override + public boolean matches(final IonReader reader, final int currentPosition, final PathExtractorConfig config) { + return true; + } +} diff --git a/src/main/java/software/amazon/com/ionpathextraction/utils/Preconditions.java b/src/main/java/software/amazon/com/ionpathextraction/utils/Preconditions.java new file mode 100644 index 0000000..1226e80 --- /dev/null +++ b/src/main/java/software/amazon/com/ionpathextraction/utils/Preconditions.java @@ -0,0 +1,48 @@ +/* + * Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. + * Licensed under the Apache License, Version 2.0 (the "License"). + * You may not use this file except in compliance with the License. + * A copy of the License is located at: + * + * http://aws.amazon.com/apache2.0/ + * + * or in the "license" file accompanying this file. This file is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific + * language governing permissions and limitations under the License. + */ + +package software.amazon.com.ionpathextraction.utils; + +import software.amazon.com.ionpathextraction.exceptions.PathExtractionException; + +/** + * Precondition check helper. + */ +public class Preconditions { + + /** + * Validates argument, fails if condition is not met. + * + * @param isValid if condition is met. + * @param message error message. + * @throws PathExtractionException if not valid. + */ + public static void checkArgument(final Boolean isValid, final String message) { + if (!isValid) { + throw new PathExtractionException(message); + } + } + + /** + * Validates a state, fails if condition is not met. + * + * @param isValid if condition is met. + * @param message error message. + * @throws PathExtractionException if not valid. + */ + public static void checkState(final Boolean isValid, final String message) { + if (!isValid) { + throw new PathExtractionException(message); + } + } +} diff --git a/src/test/kotlin/software/amazon/com/ionpathextraction/PathExtractorTest.kt b/src/test/kotlin/software/amazon/com/ionpathextraction/PathExtractorTest.kt new file mode 100644 index 0000000..947efa6 --- /dev/null +++ b/src/test/kotlin/software/amazon/com/ionpathextraction/PathExtractorTest.kt @@ -0,0 +1,205 @@ +/* + * Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. + * Licensed under the Apache License, Version 2.0 (the "License"). + * You may not use this file except in compliance with the License. + * A copy of the License is located at: + * + * http://aws.amazon.com/apache2.0/ + * + * or in the "license" file accompanying this file. This file is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific + * language governing permissions and limitations under the License. + */ + +package software.amazon.com.ionpathextraction + +import org.junit.jupiter.api.Assertions.assertEquals +import org.junit.jupiter.api.Test +import org.junit.jupiter.api.assertAll +import org.junit.jupiter.api.assertThrows +import org.junit.jupiter.params.ParameterizedTest +import org.junit.jupiter.params.provider.MethodSource +import software.amazon.com.ionpathextraction.exceptions.PathExtractionException +import software.amazon.com.ionpathextraction.pathcomponents.PathComponent +import software.amazon.ion.* +import software.amazon.ion.system.IonSystemBuilder +import java.io.File +import java.util.stream.Stream +import kotlin.test.assertTrue + +class PathExtractorTest { + companion object { + private val ION = IonSystemBuilder.standard().build() + + data class TestCase(val searchPaths: List
+ * WARNING:Implementations of this interface are not required to be Thread safe + *
*/ -public class PathExtractor { - - private final PathExtractorConfig config; - private final Tracker tracker; - - private final List+ * Default implementation of {@link PathExtractor}. + *
+ *+ * WARNING: not Thread safe. + *
+ */ +class PathExtractorImpl implements PathExtractor { + + private final PathExtractorConfig config; + private final Tracker tracker; + + private final List- * The callback receives the matcher's {@link IonReader}, positioned on the matching value, so that it can use the - * appropriate reader method to access the value. The callback return value is as a ‘step-out-N’ instruction. - * The most common value is zero, which tells the extractor to continue with the next value at the same depth. - * A return value greater than zero may be useful to users who only care about the first match at a particular - * depth + * The callback receives the matcher's {@link IonReader}, positioned on the matching value, so that it can use + * the appropriate reader method to access the value. The callback return value is as a ‘step-out-N’ + * instruction. The most common value is zero, which tells the extractor to continue with the next value at the + * same depth. A return value greater than zero may be useful to users who only care about the first match at a + * particular depth *
+ * *- * Callback implementations MUST comply with the following: + * Callback implementations MUST comply with the following: *
+ * *- * Default implementation of {@link PathExtractor}. + * Default implementation of {@link PathExtractor}. *
*- * WARNING: not Thread safe. + * WARNING: not Thread safe. *
*/ class PathExtractorImpl implements PathExtractor { @@ -65,12 +65,14 @@ public void match(final IonReader reader) { "reader must be at depth zero, it was at:" + reader.getDepth()); // short circuit when there are zero SearchPaths - if(searchPaths.isEmpty()) { + if (searchPaths.isEmpty()) { return; } // marks all search paths as active tracker.reset(searchPaths); + tracker.setInitialReaderDepth(reader.getDepth()); + matchRecursive(reader); } @@ -134,6 +136,16 @@ private int invokeCallback(final IonReader reader, final SearchPath searchPath) + ", new: " + newReaderDepth); + // we don't allow users to step out the initial reader depth + int readerRelativeDepth = reader.getDepth() - tracker.getInitialReaderDepth(); + + checkState(stepOutTimes <= readerRelativeDepth, + "Callback return cannot be greater than the reader current relative depth." + + " return: " + + stepOutTimes + + ", relative reader depth: " + + readerRelativeDepth); + return stepOutTimes; } @@ -176,6 +188,7 @@ private boolean isTerminal(final SearchPath searchPath) { private static class Tracker { private final Deque* The callback receives the matcher's {@link IonReader}, positioned on the matching value, so that it can use - * the appropriate reader method to access the value. The callback return value is as a ‘step-out-N’ - * instruction. The most common value is zero, which tells the extractor to continue with the next value at the - * same depth. A return value greater than zero may be useful to users who only care about the first match at a - * particular depth + * the appropriate reader method to access the value. The callback return value is a ‘step-out-N’ instruction. + * The most common value is zero, which tells the extractor to continue with the next value at the same depth. A + * return value greater than zero may be useful to users who only care about the first match at a particular + * depth. *
* *
diff --git a/src/main/java/software/amazon/com/ionpathextraction/PathExtractorImpl.java b/src/main/java/software/amazon/com/ionpathextraction/PathExtractorImpl.java
index 8908b38..9321a95 100644
--- a/src/main/java/software/amazon/com/ionpathextraction/PathExtractorImpl.java
+++ b/src/main/java/software/amazon/com/ionpathextraction/PathExtractorImpl.java
@@ -51,12 +51,12 @@ class PathExtractorImpl implements PathExtractor {
this.callbacks = callbacks;
this.config = config;
- int size = searchPaths.stream()
- .mapToInt(SearchPath::getId)
+ int maxSearchPathDepth = searchPaths.stream()
+ .mapToInt(sp -> sp.getPathComponents().size())
.max()
.orElse(0);
- tracker = new Tracker(size);
+ tracker = new Tracker(maxSearchPathDepth);
}
@Override
diff --git a/src/main/java/software/amazon/com/ionpathextraction/SearchPath.java b/src/main/java/software/amazon/com/ionpathextraction/SearchPath.java
index 29bdbda..3ff5194 100644
--- a/src/main/java/software/amazon/com/ionpathextraction/SearchPath.java
+++ b/src/main/java/software/amazon/com/ionpathextraction/SearchPath.java
@@ -13,14 +13,8 @@
package software.amazon.com.ionpathextraction;
-import static software.amazon.com.ionpathextraction.utils.Preconditions.checkArgument;
-
import java.util.List;
-import java.util.function.Function;
import software.amazon.com.ionpathextraction.pathcomponents.PathComponent;
-import software.amazon.ion.IonReader;
-
-// FIXME this is a weird class, probably better to remove it
/**
* A path which is provided to the extractor for matching.
From 9788b3a7ace2ad3beec429fcde15675a90ac992a Mon Sep 17 00:00:00 2001
From: barbosf
@@ -154,15 +155,7 @@ private boolean needsToStepIn(final IonReader reader, final boolean hasTerminalM
return false;
}
- switch (reader.getType()) {
- case LIST:
- case SEXP:
- case STRUCT:
- case DATAGRAM:
- return true;
- }
-
- return false;
+ return IonType.isContainer(reader.getType());
}
private boolean pathComponentMatches(final SearchPath searchPath,
@@ -219,7 +212,7 @@ void setInitialReaderDepth(final int depth) {
initialReaderDepth = depth;
}
- public int getInitialReaderDepth() {
+ int getInitialReaderDepth() {
return initialReaderDepth;
}
}
From ae5525c4a913818ae20fa150da03361b976b6d5e Mon Sep 17 00:00:00 2001
From: barbosf
+}
+
+multiple search paths:
+{
+ searchPaths:
,
+ data: