From 33ad3902fd580c5b83bcbe9b765f081f66c723e9 Mon Sep 17 00:00:00 2001 From: barbosf Date: Tue, 9 Oct 2018 10:59:28 -0700 Subject: [PATCH 01/13] PathExtraction initinal implementaion * Path Extraction implementation that exposes the IonReader to registered callbacks * Package setup, e.g. gradle, checkstyle, copyright --- .gitignore | 8 + README.md | 53 ++ build.gradle | 72 +++ config/README.md | 2 + config/checkstyle/checkstyle.xml | 264 +++++++++ config/checkstyle/copyright-header | 12 + config/intellij/codestyle.xml | 522 ++++++++++++++++++ settings.gradle | 15 + .../PathComponentParser.java | 107 ++++ .../com/ionpathextraction/PathExtractor.java | 207 +++++++ .../PathExtractorBuilder.java | 160 ++++++ .../PathExtractorConfig.java | 33 ++ .../com/ionpathextraction/SearchPath.java | 45 ++ .../exceptions/PathExtractionException.java | 32 ++ .../pathcomponents/Index.java | 47 ++ .../pathcomponents/PathComponent.java | 40 ++ .../pathcomponents/Text.java | 57 ++ .../pathcomponents/Wildcard.java | 47 ++ .../utils/Preconditions.java | 48 ++ .../ionpathextraction/PathExtractorTest.kt | 205 +++++++ src/test/resources/test-cases.ion | 209 +++++++ 21 files changed, 2185 insertions(+) create mode 100644 .gitignore create mode 100644 build.gradle create mode 100644 config/README.md create mode 100644 config/checkstyle/checkstyle.xml create mode 100644 config/checkstyle/copyright-header create mode 100644 config/intellij/codestyle.xml create mode 100644 settings.gradle create mode 100644 src/main/java/software/amazon/com/ionpathextraction/PathComponentParser.java create mode 100644 src/main/java/software/amazon/com/ionpathextraction/PathExtractor.java create mode 100644 src/main/java/software/amazon/com/ionpathextraction/PathExtractorBuilder.java create mode 100644 src/main/java/software/amazon/com/ionpathextraction/PathExtractorConfig.java create mode 100644 src/main/java/software/amazon/com/ionpathextraction/SearchPath.java create mode 100644 src/main/java/software/amazon/com/ionpathextraction/exceptions/PathExtractionException.java create mode 100644 src/main/java/software/amazon/com/ionpathextraction/pathcomponents/Index.java create mode 100644 src/main/java/software/amazon/com/ionpathextraction/pathcomponents/PathComponent.java create mode 100644 src/main/java/software/amazon/com/ionpathextraction/pathcomponents/Text.java create mode 100644 src/main/java/software/amazon/com/ionpathextraction/pathcomponents/Wildcard.java create mode 100644 src/main/java/software/amazon/com/ionpathextraction/utils/Preconditions.java create mode 100644 src/test/kotlin/software/amazon/com/ionpathextraction/PathExtractorTest.kt create mode 100644 src/test/resources/test-cases.ion diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..30a5ea6 --- /dev/null +++ b/.gitignore @@ -0,0 +1,8 @@ +.gradle/ +.idea/ +build +gradle/ +gradlew +gradlew.bat +out/ +ion-c diff --git a/README.md b/README.md index 90fcba6..f7a7b5e 100644 --- a/README.md +++ b/README.md @@ -2,6 +2,59 @@ Ion Path Extraction API aims to combine the convenience of a DOM API with the speed of a streaming API. +The traditional streaming and DOM APIs force the user to choose between speed and convenience, respectively. +Path extraction APIs aim to combine the two by allowing the user to register paths into the data using just a +few lines of code and receive callbacks during stream processing when any of those paths is matched. This allows +the Ion reader to plan the most efficient traversal over the data without requiring further manual interaction +from the user. For example, there is no reason to step in to containers which could not possibly match one of +the search paths. When encoded in binary Ion, the resulting skip is a seek forward in the input stream, which +is inexpensive relative to the cost of parsing (and in the case of a DOM, materializing) the skipped value. + +## Usage +Path extractor works in two phases: +1. Configuration +2. Notification + +### Search Paths +A `SearchPath` is a path provided to the extractor for matching. It's composed of a list of `PathComponent`s +which can be one of: +* Wildcard: matches all values +* Index: match the value at that index +* Text: match all values whose field names are equivalent to that text + +Some examples: +``` +data on reader: {foo: ["foo1", "foo2"] , bar: "myBarValue"} + +(foo 0) - matches "foo1" +(1) - matches "myBarValue" +(*) - matches ["foo1", "foo2"] and "myBarValue" +() - matches {foo: ["foo1", "foo2"] , bar: "myBarValue"} +``` + +### Configuration +The configuration phase involves building a `PathExtractor` instance through the `PathExtractorBuilder` by setting its +configuration options and registering its search paths. The built `PathExtractor` can be reused over many `IonReader`s. + +example: + +```java +PathExtractorBuilder.standard() + .withMatchCaseInsensitive(true) + .register("(foo)", (reader) -> { ... }) + .build() +``` + +see `PathExtractorBuilder` javadoc for more information on configuration options and search path registration. + +### Notification +Each time the `PathExtractor` encounters a value that matches a registered search path it will invoke the respective +callback passing the reader positioned at the current value. See `PathExtractorBuilder#register` methods for more +information on the callback contract. + +## Ion Developer information +See the developer guide on: http://amzn.github.io/ion-docs/guides/path-extractor-guide.html + ## License This library is licensed under the Apache 2.0 License. diff --git a/build.gradle b/build.gradle new file mode 100644 index 0000000..9d0e8f1 --- /dev/null +++ b/build.gradle @@ -0,0 +1,72 @@ +/* + * Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. + * Licensed under the Apache License, Version 2.0 (the "License"). + * You may not use this file except in compliance with the License. + * A copy of the License is located at: + * + * http://aws.amazon.com/apache2.0/ + * + * or in the "license" file accompanying this file. This file is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific + * language governing permissions and limitations under the License. + */ + +buildscript { + ext.ionVersion = "1.2.+" + ext.kotlin_version = "1.2.+" + ext.junitVersion = "5.3.+" + + repositories { + mavenCentral() + } + + dependencies { + classpath "org.jetbrains.kotlin:kotlin-gradle-plugin:$kotlin_version" + } +} + +apply plugin: "java" +apply plugin: "kotlin" + +group 'software.amazon.ion' +version '1.0-SNAPSHOT' + +sourceCompatibility = 1.8 + +repositories { + mavenCentral() +} + +dependencies { + compile "software.amazon.ion:ion-java:$ionVersion" + + // using kotlin to make tests less verbose + testCompile "org.jetbrains.kotlin:kotlin-stdlib-jdk8:$kotlin_version" + testCompile "org.jetbrains.kotlin:kotlin-test-junit:$kotlin_version" + + // JUnit 5 + testCompile "org.junit.jupiter:junit-jupiter-api:$junitVersion" + testCompile "org.junit.jupiter:junit-jupiter-params:$junitVersion" + testRuntime "org.junit.jupiter:junit-jupiter-engine:$junitVersion" +} + +tasks.withType(org.jetbrains.kotlin.gradle.tasks.KotlinCompile).all { + kotlinOptions { + jvmTarget = "1.8" + } +} + +apply plugin: 'checkstyle' +checkstyle { + toolVersion = "8.12" + ignoreFailures = false + maxWarnings = 0 + maxErrors = 0 + configDir = file("$rootProject.projectDir/config/checkstyle") +} +tasks.withType(Checkstyle) { + reports { + xml.enabled = false + html.enabled = true + } +} diff --git a/config/README.md b/config/README.md new file mode 100644 index 0000000..f9b41d0 --- /dev/null +++ b/config/README.md @@ -0,0 +1,2 @@ +# Configuration +Project development configuration files, for example code style and checkstyle settings used on all modules \ No newline at end of file diff --git a/config/checkstyle/checkstyle.xml b/config/checkstyle/checkstyle.xml new file mode 100644 index 0000000..6404803 --- /dev/null +++ b/config/checkstyle/checkstyle.xml @@ -0,0 +1,264 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/config/checkstyle/copyright-header b/config/checkstyle/copyright-header new file mode 100644 index 0000000..7a9bbe3 --- /dev/null +++ b/config/checkstyle/copyright-header @@ -0,0 +1,12 @@ +/* + * Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. + * Licensed under the Apache License, Version 2.0 (the "License"). + * You may not use this file except in compliance with the License. + * A copy of the License is located at: + * + * http://aws.amazon.com/apache2.0/ + * + * or in the "license" file accompanying this file. This file is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific + * language governing permissions and limitations under the License. + */ \ No newline at end of file diff --git a/config/intellij/codestyle.xml b/config/intellij/codestyle.xml new file mode 100644 index 0000000..4d45f85 --- /dev/null +++ b/config/intellij/codestyle.xml @@ -0,0 +1,522 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + xmlns:android + + ^$ + + + +
+
+ + + + xmlns:.* + + ^$ + + + BY_NAME + +
+
+ + + + .*:id + + http://schemas.android.com/apk/res/android + + + +
+
+ + + + style + + ^$ + + + +
+
+ + + + .* + + ^$ + + + BY_NAME + +
+
+ + + + .*:.*Style + + http://schemas.android.com/apk/res/android + + + BY_NAME + +
+
+ + + + .*:layout_width + + http://schemas.android.com/apk/res/android + + + +
+
+ + + + .*:layout_height + + http://schemas.android.com/apk/res/android + + + +
+
+ + + + .*:layout_weight + + http://schemas.android.com/apk/res/android + + + +
+
+ + + + .*:layout_margin + + http://schemas.android.com/apk/res/android + + + +
+
+ + + + .*:layout_marginTop + + http://schemas.android.com/apk/res/android + + + +
+
+ + + + .*:layout_marginBottom + + http://schemas.android.com/apk/res/android + + + +
+
+ + + + .*:layout_marginStart + + http://schemas.android.com/apk/res/android + + + +
+
+ + + + .*:layout_marginEnd + + http://schemas.android.com/apk/res/android + + + +
+
+ + + + .*:layout_marginLeft + + http://schemas.android.com/apk/res/android + + + +
+
+ + + + .*:layout_marginRight + + http://schemas.android.com/apk/res/android + + + +
+
+ + + + .*:layout_.* + + http://schemas.android.com/apk/res/android + + + BY_NAME + +
+
+ + + + .*:padding + + http://schemas.android.com/apk/res/android + + + +
+
+ + + + .*:paddingTop + + http://schemas.android.com/apk/res/android + + + +
+
+ + + + .*:paddingBottom + + http://schemas.android.com/apk/res/android + + + +
+
+ + + + .*:paddingStart + + http://schemas.android.com/apk/res/android + + + +
+
+ + + + .*:paddingEnd + + http://schemas.android.com/apk/res/android + + + +
+
+ + + + .*:paddingLeft + + http://schemas.android.com/apk/res/android + + + +
+
+ + + + .*:paddingRight + + http://schemas.android.com/apk/res/android + + + +
+
+ + + + .* + http://schemas.android.com/apk/res/android + + + BY_NAME + +
+
+ + + + .* + http://schemas.android.com/apk/res-auto + + + BY_NAME + +
+
+ + + + .* + http://schemas.android.com/tools + + + BY_NAME + +
+
+ + + + .* + .* + + + BY_NAME + +
+
+
+
+ + +
\ No newline at end of file diff --git a/settings.gradle b/settings.gradle new file mode 100644 index 0000000..d837e01 --- /dev/null +++ b/settings.gradle @@ -0,0 +1,15 @@ +/* + * Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. + * Licensed under the Apache License, Version 2.0 (the "License"). + * You may not use this file except in compliance with the License. + * A copy of the License is located at: + * + * http://aws.amazon.com/apache2.0/ + * + * or in the "license" file accompanying this file. This file is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific + * language governing permissions and limitations under the License. + */ + +rootProject.name = 'ion-java-path-extraction' + diff --git a/src/main/java/software/amazon/com/ionpathextraction/PathComponentParser.java b/src/main/java/software/amazon/com/ionpathextraction/PathComponentParser.java new file mode 100644 index 0000000..7579964 --- /dev/null +++ b/src/main/java/software/amazon/com/ionpathextraction/PathComponentParser.java @@ -0,0 +1,107 @@ +/* + * Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. + * Licensed under the Apache License, Version 2.0 (the "License"). + * You may not use this file except in compliance with the License. + * A copy of the License is located at: + * + * http://aws.amazon.com/apache2.0/ + * + * or in the "license" file accompanying this file. This file is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific + * language governing permissions and limitations under the License. + */ + +package software.amazon.com.ionpathextraction; + +import static software.amazon.com.ionpathextraction.utils.Preconditions.checkArgument; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.Iterator; +import java.util.List; +import software.amazon.com.ionpathextraction.exceptions.PathExtractionException; +import software.amazon.com.ionpathextraction.pathcomponents.Index; +import software.amazon.com.ionpathextraction.pathcomponents.PathComponent; +import software.amazon.com.ionpathextraction.pathcomponents.Text; +import software.amazon.com.ionpathextraction.pathcomponents.Wildcard; +import software.amazon.ion.IonReader; +import software.amazon.ion.IonSystem; +import software.amazon.ion.IonType; +import software.amazon.ion.IonWriter; +import software.amazon.ion.system.IonSystemBuilder; + +/** + * Parses a search path ion expression into {@link PathComponent}s. + */ +class PathComponentParser { + + private static final IonSystem ION_SYSTEM = IonSystemBuilder.standard().build(); + + private static final String WILDCARD_ESCAPE_ANNOTATION = "$ion_extractor_field"; + + List parse(final String ionPathExpression) { + List pathComponents; + + try (final IonReader reader = ION_SYSTEM.newReader(ionPathExpression)) { + checkArgument(reader.next() != null, "ionPathExpression cannot be empty"); + checkArgument(reader.getType() == IonType.SEXP, "ionPathExpression must be a s-expression"); + + reader.stepIn(); + pathComponents = readStates(reader); + } catch (IOException e) { + throw new PathExtractionException(e); + } + + return pathComponents; + } + + private List readStates(final IonReader reader) { + final List pathComponents = new ArrayList<>(); + + while (reader.next() != null) { + switch (reader.getType()) { + case INT: + pathComponents.add(new Index(reader.intValue())); + break; + + case STRING: + case SYMBOL: + if (isWildcard(reader)) { + pathComponents.add(Wildcard.INSTANCE); + } else { + pathComponents.add(new Text(reader.stringValue())); + } + break; + + default: + throw new PathExtractionException("Invalid path component type: " + readIonText(reader)); + } + } + + return pathComponents; + } + + private String readIonText(final IonReader reader) { + StringBuilder out = new StringBuilder(); + try (IonWriter writer = ION_SYSTEM.newTextWriter(out)) { + writer.writeValue(reader); + } catch (IOException e) { + throw new PathExtractionException(e); + } + return out.toString(); + } + + private boolean isWildcard(final IonReader reader) { + if (reader.stringValue().equals(Wildcard.TEXT)) { + for (final Iterator iter = reader.iterateTypeAnnotations(); iter.hasNext(); ) { + final String annotation = iter.next(); + if (WILDCARD_ESCAPE_ANNOTATION.equals(annotation)) { + return false; + } + } + + return true; + } + return false; + } +} diff --git a/src/main/java/software/amazon/com/ionpathextraction/PathExtractor.java b/src/main/java/software/amazon/com/ionpathextraction/PathExtractor.java new file mode 100644 index 0000000..2293111 --- /dev/null +++ b/src/main/java/software/amazon/com/ionpathextraction/PathExtractor.java @@ -0,0 +1,207 @@ +/* + * Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. + * Licensed under the Apache License, Version 2.0 (the "License"). + * You may not use this file except in compliance with the License. + * A copy of the License is located at: + * + * http://aws.amazon.com/apache2.0/ + * + * or in the "license" file accompanying this file. This file is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific + * language governing permissions and limitations under the License. + */ + +package software.amazon.com.ionpathextraction; + +import static software.amazon.com.ionpathextraction.utils.Preconditions.checkArgument; +import static software.amazon.com.ionpathextraction.utils.Preconditions.checkState; + +import java.util.ArrayDeque; +import java.util.ArrayList; +import java.util.Deque; +import java.util.List; +import java.util.function.Function; +import software.amazon.com.ionpathextraction.pathcomponents.PathComponent; +import software.amazon.ion.IonReader; + +/** + *

+ * Path extractor takes registered paths and when it finds one during stream processing invokes the respective callback. + * This allows the Ion reader to plan the most efficient traversal over the data without requiring further manual + * interaction from the user. + *

+ * + *

+ * For example, there is no reason to step in to containers which could not possibly match one of the search paths. When + * encoded in binary Ion, the resulting skip is a seek forward in the input stream, which is inexpensive relative to the + * cost of parsing (and in the case of a DOM, materializing) the skipped value. + *

+ */ +public class PathExtractor { + + private final PathExtractorConfig config; + private final Tracker tracker; + + private final List searchPaths; + private final List> callbacks; + + PathExtractor(final List searchPaths, + final List> callbacks, + final PathExtractorConfig config) { + + this.searchPaths = searchPaths; + this.callbacks = callbacks; + this.config = config; + + int size = searchPaths.stream() + .mapToInt(SearchPath::getId) + .max() + .orElse(0); + + tracker = new Tracker(size); + } + + /** + * Iterates over the reader looking for registered search paths, when a match is found invokes the respective + * callback. + * + * @param reader {@link IonReader} to process. + */ + public void match(final IonReader reader) { + checkArgument(reader.getDepth() == 0 || config.isMatchRelativePaths(), + "reader must be at depth zero, it was at:" + reader.getDepth()); + + // marks all search paths as active + tracker.reset(searchPaths); + matchRecursive(reader); + } + + private int matchRecursive(final IonReader reader) { + final int currentDepth = tracker.getCurrentDepth(); + int ordinal = 0; + + while (reader.next() != null) { + // will continue to next depth + final List partialMatches = new ArrayList<>(); + + boolean hasTerminalMatch = false; + for (SearchPath sp : tracker.activePaths()) { + boolean match = pathComponentMatches(sp, reader, ordinal); + boolean isTerminal = isTerminal(sp); + + if (match && isTerminal) { + hasTerminalMatch = true; + int stepOutTimes = invokeCallback(reader, sp); + if (stepOutTimes > 0) { + return stepOutTimes - 1; + } + } + + if (!isTerminal) { + // all non terminal paths are partial pathComponentMatches at depth zero + if (currentDepth == 0) { + partialMatches.add(sp); + } else if (match) { + partialMatches.add(sp); + } + } + } + + if (needsToStepIn(reader, hasTerminalMatch)) { + tracker.push(partialMatches); + reader.stepIn(); + int stepOutTimes = matchRecursive(reader); + reader.stepOut(); + tracker.pop(); + + if (stepOutTimes > 0) { + return stepOutTimes - 1; + } + } + + ordinal += 1; + } + + return 0; + } + + private int invokeCallback(final IonReader reader, final SearchPath searchPath) { + int previousReaderDepth = reader.getDepth(); + int stepOutTimes = callbacks.get(searchPath.getId()).apply(reader); + int newReaderDepth = reader.getDepth(); + + checkState(previousReaderDepth == newReaderDepth, + "Reader must be at same depth when returning from callbacks. initial: " + + previousReaderDepth + + ", new: " + + newReaderDepth); + + return stepOutTimes; + } + + private boolean needsToStepIn(final IonReader reader, final boolean hasTerminalMatches) { + if (tracker.getCurrentDepth() == 0 && hasTerminalMatches) { + return false; + } + + switch (reader.getType()) { + case LIST: + case SEXP: + case STRUCT: + case DATAGRAM: + return true; + } + + return false; + } + + private boolean pathComponentMatches(final SearchPath searchPath, + final IonReader reader, + final int currentPosition) { + // depth 0 can only match the empty search path: () + int depth = tracker.getCurrentDepth(); + List pathComponents = searchPath.getPathComponents(); + + if (depth == 0) { + return pathComponents.isEmpty(); + } else if (depth <= pathComponents.size()) { + return pathComponents.get(depth - 1).matches(reader, currentPosition, config); + } + + return false; + } + + private boolean isTerminal(final SearchPath searchPath) { + return tracker.getCurrentDepth() == searchPath.getPathComponents().size(); + } + + private static class Tracker { + + private final Deque> stack; + + Tracker(final int size) { + stack = new ArrayDeque<>(size); + } + + void reset(final List searchPaths) { + stack.clear(); + stack.push(searchPaths); + } + + List activePaths() { + return stack.peek(); + } + + int getCurrentDepth() { + return stack.size() - 1; + } + + void push(final List partialMatches) { + stack.push(partialMatches); + } + + void pop() { + stack.pop(); + } + } +} diff --git a/src/main/java/software/amazon/com/ionpathextraction/PathExtractorBuilder.java b/src/main/java/software/amazon/com/ionpathextraction/PathExtractorBuilder.java new file mode 100644 index 0000000..78f516f --- /dev/null +++ b/src/main/java/software/amazon/com/ionpathextraction/PathExtractorBuilder.java @@ -0,0 +1,160 @@ +/* + * Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. + * Licensed under the Apache License, Version 2.0 (the "License"). + * You may not use this file except in compliance with the License. + * A copy of the License is located at: + * + * http://aws.amazon.com/apache2.0/ + * + * or in the "license" file accompanying this file. This file is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific + * language governing permissions and limitations under the License. + */ + +package software.amazon.com.ionpathextraction; + +import static software.amazon.com.ionpathextraction.utils.Preconditions.checkArgument; + +import java.util.ArrayList; +import java.util.List; +import java.util.function.Function; +import software.amazon.com.ionpathextraction.pathcomponents.PathComponent; +import software.amazon.ion.IonReader; + +/** + * {@link PathExtractor} builder. + */ +public final class PathExtractorBuilder { + + private static final boolean DEFAULT_MATCH_RELATIVE_PATHS = false; + private static final boolean DEFAULT_CASE_INSENSITIVE = false; + private final List searchPaths = new ArrayList<>(); + private final List> callbacks = new ArrayList<>(); + private boolean matchRelativePaths; + private boolean matchCaseInsensitive; + private PathComponentParser compiler; + + + private PathExtractorBuilder() { + } + + /** + * Creates a new builder with standard configuration. + * + * @return new standard builder instance. + */ + public static PathExtractorBuilder standard() { + PathExtractorBuilder builder = new PathExtractorBuilder(); + builder.matchCaseInsensitive = DEFAULT_CASE_INSENSITIVE; + builder.matchRelativePaths = DEFAULT_MATCH_RELATIVE_PATHS; + + builder.compiler = new PathComponentParser(); + + return builder; + } + + /** + * Instantiates a {@link PathExtractor} configured by this builder. + * + * @return new {@link PathExtractor} instance. + */ + public PathExtractor build() { + return new PathExtractor( + searchPaths, + callbacks, + new PathExtractorConfig(matchRelativePaths, matchCaseInsensitive) + ); + } + + /** + * Sets matchRelativePaths config. When true the path extractor will accept readers at any depth, when false the + * reader must be at depth zero. + * + *
+ * defaults to false. + * + * @param matchRelativePaths new config value. + * @return builder for chaining. + */ + public PathExtractorBuilder withMatchRelativePaths(final boolean matchRelativePaths) { + this.matchRelativePaths = matchRelativePaths; + + return this; + } + + /** + * Sets matchCaseInsensitive config. When true the path extractor will match fields ignoring case, when false the + * path extractor will mach respecting the path components case. + * + *
+ * defaults to false. + * + * @param matchCaseInsensitive new config value. + * @return builder for chaining. + */ + public PathExtractorBuilder withMatchCaseInsensitive(final boolean matchCaseInsensitive) { + this.matchCaseInsensitive = matchCaseInsensitive; + + return this; + } + + /** + * Register a callback for a search path. + * + * @param searchExpressionAsIon string representation of a search path. + * @param callback callback to be registered. + * @return builder for chaining. + * @see PathExtractorBuilder#register(List, Function) + */ + public PathExtractorBuilder register(final String searchExpressionAsIon, + final Function callback) { + checkArgument(searchExpressionAsIon != null, "searchExpressionAsIon cannot be null"); + + List pathComponents = compiler.parse(searchExpressionAsIon); + register(pathComponents, callback); + + return this; + } + + /** + * Register a callback for a search path. + *

+ * The callback receives the matcher's {@link IonReader}, positioned on the matching value, so that it can use the + * appropriate reader method to access the value. The callback return value is as a ‘step-out-N’ instruction. + * The most common value is zero, which tells the extractor to continue with the next value at the same depth. + * A return value greater than zero may be useful to users who only care about the first match at a particular + * depth + *

+ *

+ * Callback implementations MUST comply with the following: + *

+ *
    + *
  • + * The reader must not be advanced past the matching value. Violating this will cause the following value to be + * skipped. If a value is skipped, neither the value itself nor any of its children will be checked for match + * against any of the extractor's registered paths. + *
  • + *
  • + * If the reader is positioned on a container value, its cursor must be at the same depth when the callback returns. + * In other words, if the user steps in to the matched value, it must step out an equal number of times. Violating + * this will raise an error. + *
  • + *
+ *

+ * + * @param pathComponents search path as a list of path components. + * @param callback callback to be registered. + * @return builder for chaining. + * @see PathExtractorBuilder#register(String, Function) + */ + public PathExtractorBuilder register(final List pathComponents, + final Function callback) { + checkArgument(pathComponents != null, "pathComponents cannot be null"); + checkArgument(callback != null, "callback cannot be null"); + + searchPaths.add(new SearchPath(searchPaths.size(), pathComponents)); + callbacks.add(callback); + + return this; + } +} diff --git a/src/main/java/software/amazon/com/ionpathextraction/PathExtractorConfig.java b/src/main/java/software/amazon/com/ionpathextraction/PathExtractorConfig.java new file mode 100644 index 0000000..6937302 --- /dev/null +++ b/src/main/java/software/amazon/com/ionpathextraction/PathExtractorConfig.java @@ -0,0 +1,33 @@ +/* + * Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. + * Licensed under the Apache License, Version 2.0 (the "License"). + * You may not use this file except in compliance with the License. + * A copy of the License is located at: + * + * http://aws.amazon.com/apache2.0/ + * + * or in the "license" file accompanying this file. This file is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific + * language governing permissions and limitations under the License. + */ + +package software.amazon.com.ionpathextraction; + +public class PathExtractorConfig { + + private final boolean matchRelativePaths; + private final boolean matchCaseInsensitive; + + PathExtractorConfig(final boolean matchRelativePaths, final boolean matchCaseInsensitive) { + this.matchRelativePaths = matchRelativePaths; + this.matchCaseInsensitive = matchCaseInsensitive; + } + + boolean isMatchRelativePaths() { + return matchRelativePaths; + } + + public boolean isMatchCaseInsensitive() { + return matchCaseInsensitive; + } +} diff --git a/src/main/java/software/amazon/com/ionpathextraction/SearchPath.java b/src/main/java/software/amazon/com/ionpathextraction/SearchPath.java new file mode 100644 index 0000000..29bdbda --- /dev/null +++ b/src/main/java/software/amazon/com/ionpathextraction/SearchPath.java @@ -0,0 +1,45 @@ +/* + * Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. + * Licensed under the Apache License, Version 2.0 (the "License"). + * You may not use this file except in compliance with the License. + * A copy of the License is located at: + * + * http://aws.amazon.com/apache2.0/ + * + * or in the "license" file accompanying this file. This file is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific + * language governing permissions and limitations under the License. + */ + +package software.amazon.com.ionpathextraction; + +import static software.amazon.com.ionpathextraction.utils.Preconditions.checkArgument; + +import java.util.List; +import java.util.function.Function; +import software.amazon.com.ionpathextraction.pathcomponents.PathComponent; +import software.amazon.ion.IonReader; + +// FIXME this is a weird class, probably better to remove it + +/** + * A path which is provided to the extractor for matching. + */ +class SearchPath { + + private final int id; + private final List pathComponents; + + SearchPath(final int id, final List pathComponents) { + this.id = id; + this.pathComponents = pathComponents; + } + + int getId() { + return id; + } + + List getPathComponents() { + return pathComponents; + } +} diff --git a/src/main/java/software/amazon/com/ionpathextraction/exceptions/PathExtractionException.java b/src/main/java/software/amazon/com/ionpathextraction/exceptions/PathExtractionException.java new file mode 100644 index 0000000..13c3538 --- /dev/null +++ b/src/main/java/software/amazon/com/ionpathextraction/exceptions/PathExtractionException.java @@ -0,0 +1,32 @@ +/* + * Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. + * Licensed under the Apache License, Version 2.0 (the "License"). + * You may not use this file except in compliance with the License. + * A copy of the License is located at: + * + * http://aws.amazon.com/apache2.0/ + * + * or in the "license" file accompanying this file. This file is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific + * language governing permissions and limitations under the License. + */ + +package software.amazon.com.ionpathextraction.exceptions; + +/** + * Base exception. + */ +public class PathExtractionException extends RuntimeException { + + public PathExtractionException(final String message) { + super(message); + } + + public PathExtractionException(final String message, final Throwable cause) { + super(message, cause); + } + + public PathExtractionException(final Throwable cause) { + super(cause); + } +} diff --git a/src/main/java/software/amazon/com/ionpathextraction/pathcomponents/Index.java b/src/main/java/software/amazon/com/ionpathextraction/pathcomponents/Index.java new file mode 100644 index 0000000..c74aad4 --- /dev/null +++ b/src/main/java/software/amazon/com/ionpathextraction/pathcomponents/Index.java @@ -0,0 +1,47 @@ +/* + * Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. + * Licensed under the Apache License, Version 2.0 (the "License"). + * You may not use this file except in compliance with the License. + * A copy of the License is located at: + * + * http://aws.amazon.com/apache2.0/ + * + * or in the "license" file accompanying this file. This file is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific + * language governing permissions and limitations under the License. + */ + +package software.amazon.com.ionpathextraction.pathcomponents; + +import software.amazon.com.ionpathextraction.PathExtractorConfig; +import software.amazon.ion.IonReader; + +/** + * Index path component matches collection by position, example. + *
+ * data: {foo: [1,2,3], bar: { baz: [1] }}
+ *
+ * search path | callback invoked with reader at
+ * ------------|--------------------
+ *  (0)        | [1, 2, 3]
+ *  (0 2)      | 3
+ * 
+ */ +public class Index implements PathComponent { + + private final int ordinal; + + /** + * Constructor. + * + * @param ordinal component ordinal. + */ + public Index(final int ordinal) { + this.ordinal = ordinal; + } + + @Override + public boolean matches(final IonReader reader, final int currentPosition, final PathExtractorConfig config) { + return ordinal == currentPosition; + } +} diff --git a/src/main/java/software/amazon/com/ionpathextraction/pathcomponents/PathComponent.java b/src/main/java/software/amazon/com/ionpathextraction/pathcomponents/PathComponent.java new file mode 100644 index 0000000..8318475 --- /dev/null +++ b/src/main/java/software/amazon/com/ionpathextraction/pathcomponents/PathComponent.java @@ -0,0 +1,40 @@ +/* + * Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. + * Licensed under the Apache License, Version 2.0 (the "License"). + * You may not use this file except in compliance with the License. + * A copy of the License is located at: + * + * http://aws.amazon.com/apache2.0/ + * + * or in the "license" file accompanying this file. This file is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific + * language governing permissions and limitations under the License. + */ + +package software.amazon.com.ionpathextraction.pathcomponents; + +import software.amazon.com.ionpathextraction.PathExtractor; +import software.amazon.com.ionpathextraction.PathExtractorConfig; +import software.amazon.ion.IonReader; + +/** + * A search path component, for example the path (foo * 1) has three components. + * + *
    + *
  1. foo
  2. + *
  3. *
  4. + *
  5. 1
  6. + *
+ */ +public interface PathComponent { + + /** + * Checks if this component matches the current reader position with the given configuration. + * + * @param reader {@link IonReader}. + * @param currentPosition reader value position at the the current depth. + * @param config {@link PathExtractor} configuration. + * @return true if the component matches the current reader position false otherwise. + */ + boolean matches(final IonReader reader, final int currentPosition, final PathExtractorConfig config); +} diff --git a/src/main/java/software/amazon/com/ionpathextraction/pathcomponents/Text.java b/src/main/java/software/amazon/com/ionpathextraction/pathcomponents/Text.java new file mode 100644 index 0000000..84c61f4 --- /dev/null +++ b/src/main/java/software/amazon/com/ionpathextraction/pathcomponents/Text.java @@ -0,0 +1,57 @@ +/* + * Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. + * Licensed under the Apache License, Version 2.0 (the "License"). + * You may not use this file except in compliance with the License. + * A copy of the License is located at: + * + * http://aws.amazon.com/apache2.0/ + * + * or in the "license" file accompanying this file. This file is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific + * language governing permissions and limitations under the License. + */ + +package software.amazon.com.ionpathextraction.pathcomponents; + +import static software.amazon.com.ionpathextraction.utils.Preconditions.checkArgument; + +import software.amazon.com.ionpathextraction.PathExtractorConfig; +import software.amazon.ion.IonReader; + +/** + * Text path component matches struct field names, example. + *
+ * data: {foo: [1,2,3], bar: { baz: [1] }}
+ *
+ * search path | callback invoked with reader at
+ * ------------|--------------------
+ *  (foo)      | [1, 2, 3]
+ *  (bar baz)  | [1]
+ * 
+ */ +public class Text implements PathComponent { + + private final String fieldName; + + /** + * Constructor. + * + * @param fieldName component field name. + */ + public Text(final String fieldName) { + checkArgument(fieldName != null, "fieldName cannot be null"); + + this.fieldName = fieldName; + } + + @Override + public boolean matches(final IonReader reader, final int currentPosition, final PathExtractorConfig config) { + if (!reader.isInStruct()) { + return false; + } + + return config.isMatchCaseInsensitive() + ? fieldName.equalsIgnoreCase(reader.getFieldName()) + : fieldName.equals(reader.getFieldName()); + } +} diff --git a/src/main/java/software/amazon/com/ionpathextraction/pathcomponents/Wildcard.java b/src/main/java/software/amazon/com/ionpathextraction/pathcomponents/Wildcard.java new file mode 100644 index 0000000..6c42d47 --- /dev/null +++ b/src/main/java/software/amazon/com/ionpathextraction/pathcomponents/Wildcard.java @@ -0,0 +1,47 @@ +/* + * Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. + * Licensed under the Apache License, Version 2.0 (the "License"). + * You may not use this file except in compliance with the License. + * A copy of the License is located at: + * + * http://aws.amazon.com/apache2.0/ + * + * or in the "license" file accompanying this file. This file is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific + * language governing permissions and limitations under the License. + */ + +package software.amazon.com.ionpathextraction.pathcomponents; + +import software.amazon.com.ionpathextraction.PathExtractorConfig; +import software.amazon.ion.IonReader; + +/** + * Wildcard path component matches any value, example. + *
+ * data: {foo: [1,2,3], bar: { baz: [1] }}
+ *
+ * search path | callback invoked with reader at
+ * ------------|--------------------
+ *  (*)        | [1, 2, 3] and { baz: [1] }
+ *  (* *)      | 1, 2, 3 and [1]
+ * 
+ */ +public class Wildcard implements PathComponent { + + public static final String TEXT = "*"; + + /** + * Singleton {@link Wildcard} instance. + */ + public static final Wildcard INSTANCE = new Wildcard(); + + /** use INSTANCE. */ + private Wildcard() { + } + + @Override + public boolean matches(final IonReader reader, final int currentPosition, final PathExtractorConfig config) { + return true; + } +} diff --git a/src/main/java/software/amazon/com/ionpathextraction/utils/Preconditions.java b/src/main/java/software/amazon/com/ionpathextraction/utils/Preconditions.java new file mode 100644 index 0000000..1226e80 --- /dev/null +++ b/src/main/java/software/amazon/com/ionpathextraction/utils/Preconditions.java @@ -0,0 +1,48 @@ +/* + * Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. + * Licensed under the Apache License, Version 2.0 (the "License"). + * You may not use this file except in compliance with the License. + * A copy of the License is located at: + * + * http://aws.amazon.com/apache2.0/ + * + * or in the "license" file accompanying this file. This file is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific + * language governing permissions and limitations under the License. + */ + +package software.amazon.com.ionpathextraction.utils; + +import software.amazon.com.ionpathextraction.exceptions.PathExtractionException; + +/** + * Precondition check helper. + */ +public class Preconditions { + + /** + * Validates argument, fails if condition is not met. + * + * @param isValid if condition is met. + * @param message error message. + * @throws PathExtractionException if not valid. + */ + public static void checkArgument(final Boolean isValid, final String message) { + if (!isValid) { + throw new PathExtractionException(message); + } + } + + /** + * Validates a state, fails if condition is not met. + * + * @param isValid if condition is met. + * @param message error message. + * @throws PathExtractionException if not valid. + */ + public static void checkState(final Boolean isValid, final String message) { + if (!isValid) { + throw new PathExtractionException(message); + } + } +} diff --git a/src/test/kotlin/software/amazon/com/ionpathextraction/PathExtractorTest.kt b/src/test/kotlin/software/amazon/com/ionpathextraction/PathExtractorTest.kt new file mode 100644 index 0000000..947efa6 --- /dev/null +++ b/src/test/kotlin/software/amazon/com/ionpathextraction/PathExtractorTest.kt @@ -0,0 +1,205 @@ +/* + * Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. + * Licensed under the Apache License, Version 2.0 (the "License"). + * You may not use this file except in compliance with the License. + * A copy of the License is located at: + * + * http://aws.amazon.com/apache2.0/ + * + * or in the "license" file accompanying this file. This file is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific + * language governing permissions and limitations under the License. + */ + +package software.amazon.com.ionpathextraction + +import org.junit.jupiter.api.Assertions.assertEquals +import org.junit.jupiter.api.Test +import org.junit.jupiter.api.assertAll +import org.junit.jupiter.api.assertThrows +import org.junit.jupiter.params.ParameterizedTest +import org.junit.jupiter.params.provider.MethodSource +import software.amazon.com.ionpathextraction.exceptions.PathExtractionException +import software.amazon.com.ionpathextraction.pathcomponents.PathComponent +import software.amazon.ion.* +import software.amazon.ion.system.IonSystemBuilder +import java.io.File +import java.util.stream.Stream +import kotlin.test.assertTrue + +class PathExtractorTest { + companion object { + private val ION = IonSystemBuilder.standard().build() + + data class TestCase(val searchPaths: List, + val data: String, + val expected: IonList, + val stepOutNumber: Int) { + override fun toString(): String = "SearchPaths=$searchPaths, " + + "Data=$data, " + + "Expected=$expected, " + + "StepOutN=$stepOutNumber" + } + + private fun IonValue.toText(): String { + val out = StringBuilder() + ION.newTextWriter(out).use { this.writeTo(it) } + return out.toString() + } + + @JvmStatic + fun testCases(): Stream = + ION.loader.load(File("src/test/resources/test-cases.ion")) + .map { it as IonStruct } + .map { struct -> + val searchPathIonValue = struct["searchPath"] + val searchPaths = when (searchPathIonValue) { + is IonList -> searchPathIonValue.map { it.toText() } + else -> listOf(searchPathIonValue.toText()) + } + + TestCase( + searchPaths, + struct["data"].toText(), + struct["expected"] as IonList, + struct["stepOutN"]?.let { (it as IonInt).intValue() } ?: 0 + ) + }.stream() + } + + private val emptyCallback: (IonReader) -> Int = { 0 } + + private fun collectToIonList(out: IonList, stepOutN: Int): (IonReader) -> Int = { reader -> + ION.newWriter(out).use { it.writeValue(reader) } + stepOutN + } + + @ParameterizedTest + @MethodSource("testCases") + fun testSearchPaths(testCase: TestCase) { + val out = ION.newEmptyList() + + val builder = PathExtractorBuilder.standard() + testCase.searchPaths.forEach { builder.register(it, collectToIonList(out, testCase.stepOutNumber)) } + val extractor = builder.build() + + extractor.match(ION.newReader(testCase.data)) + + assertEquals(testCase.expected, out) + } + + @Test + fun testCorrectCallbackCalled() { + var timesCallback1Called = 0 + var timesCallback2Called = 0 + + val extractor = PathExtractorBuilder.standard() + .register("(foo)") { + timesCallback1Called++ + 0 + } + .register("(bar)") { + timesCallback2Called++ + 0 + } + .build() + + extractor.match(ION.newReader("{ bar: 1, bar: 2, foo: 3 }")) + + assertAll( + { assertEquals(1, timesCallback1Called) }, + { assertEquals(2, timesCallback2Called) } + ) + } + + @Test + fun readerAtInvalidDepth() { + val extractor = PathExtractorBuilder.standard() + .register("(foo)") { 0 } + .build() + + val reader = ION.newReader("[{foo: 1}]") + assertTrue(reader.next() != null) + reader.stepIn() + + val exception = assertThrows { extractor.match(reader) } + assertEquals("reader must be at depth zero, it was at:1", exception.message) + } + + @Test + fun matchRelative() { + val out = ION.newEmptyList() + val extractor = PathExtractorBuilder.standard() + .withMatchRelativePaths(true) + .register("(foo)", collectToIonList(out, 0)) + .build() + + val reader = ION.newReader("[{foo: 1}]") + assertTrue(reader.next() != null) + reader.stepIn() + + extractor.match(reader) + + assertEquals(ION.singleValue("[1]"), out) + } + + @Test + fun caseInsensitive() { + val out = ION.newEmptyList() + val extractor = PathExtractorBuilder.standard() + .withMatchCaseInsensitive(true) + .register("(foo)", collectToIonList(out, 0)) + .build() + + extractor.match(ION.newReader("{FOO: 1}{foo: 2}{fOo: 3}{bar: 4}")) + + assertEquals(ION.singleValue("[1,2,3]"), out) + } + + // Invalid configuration ----------------------------------------------------------------------------- + + @Test + fun nullStringPath() { + val exception = assertThrows { + PathExtractorBuilder.standard().register(null as String?, emptyCallback) + } + + assertEquals("searchExpressionAsIon cannot be null", exception.message) + } + + @Test + fun nullListPath() { + val exception = assertThrows { + PathExtractorBuilder.standard().register(null as List?, emptyCallback) + } + + assertEquals("pathComponents cannot be null", exception.message) + } + + @Test + fun nullCallback() { + val exception = assertThrows { + PathExtractorBuilder.standard().register("(foo)", null) + } + + assertEquals("callback cannot be null", exception.message) + } + + @Test + fun emptySearchPath() { + val exception = assertThrows { + PathExtractorBuilder.standard().register("", emptyCallback) + } + + assertEquals("ionPathExpression cannot be empty", exception.message) + } + + @Test + fun searchPathNotSexp() { + val exception = assertThrows { + PathExtractorBuilder.standard().register("1", emptyCallback) + } + + assertEquals("ionPathExpression must be a s-expression", exception.message) + } +} \ No newline at end of file diff --git a/src/test/resources/test-cases.ion b/src/test/resources/test-cases.ion new file mode 100644 index 0000000..7223c1f --- /dev/null +++ b/src/test/resources/test-cases.ion @@ -0,0 +1,209 @@ +// Field only ---------------------------------------------------------------------------- + +// matches +{ searchPath: (foo), data: {foo: 1}, expected: [1] } +{ searchPath: (foo bar), data: {foo: {bar : 2}}, expected: [2] } + +// escaped wildcard +{ searchPath: ('$ion_extractor_field'::*), data: {'*': 1, foo: 2}, expected: [1]} + +// matches one sibling +{ searchPath: (foo baz), data: {foo: {bar : 2, baz: 3}}, expected: [3] } + +// multiple matches +{ searchPath: (foo bar), data: {foo: {bar : 2, bar: 3}}, expected: [2, 3] } + +// no match +{ searchPath: (foo), data: {baz: 10}, expected: [] } +{ searchPath: (foo baz), data: {foo: {bar : 2}}, expected: [] } + +// stepOut +{ searchPath: (foo bar), data: {foo: {bar : 2, bar: 3}}, expected: [2], stepOutN: 1 } +{ + searchPath: (foo bar baz), + data: { foo: { bar: {baz: 1}, bar: {baz: 2} } }, + expected: [1], + stepOutN: 2 +} + +// empty containers +{ searchPath: (foo), data: {}, expected: [] } +{ searchPath: (foo), data: (), expected: [] } +{ searchPath: (foo), data: [], expected: [] } + +// not containers +{ searchPath: (foo), data: null, expected: [] } +{ searchPath: (foo), data: true, expected: [] } +{ searchPath: (foo), data: 1, expected: [] } +{ searchPath: (foo), data: 1e0, expected: [] } +{ searchPath: (foo), data: 1.0, expected: [] } +{ searchPath: (foo), data: 2018T, expected: [] } +{ searchPath: (foo), data: "", expected: [] } +{ searchPath: (foo), data: '', expected: [] } +{ searchPath: (foo), data: {{ }}, expected: [] } +{ searchPath: (foo), data: {{ "" }}, expected: [] } + + + +// Ordinal only -------------------------------------------------------------------------- + +// matches +{ searchPath: (0), data: [1], expected: [1] } +{ searchPath: (0), data: (1), expected: [1] } +{ searchPath: (0), data: {f: 1}, expected: [1] } +{ searchPath: (1), data: [1, 2], expected: [2] } +{ searchPath: (1), data: (1 3), expected: [3] } +{ searchPath: (1), data: {f1: 1, f2: 2}, expected: [2] } +{ searchPath: (0), data: [1, 2], expected: [1] } +{ searchPath: (0), data: (1 3), expected: [1] } +{ searchPath: (0), data: {f1: 1, f2: 2}, expected: [1] } + +// out of bounds +{ searchPath: (1), data: [1], expected: [] } +{ searchPath: (1), data: (1), expected: [] } +{ searchPath: (1), data: {foo: 1}, expected: [] } + +// empty containers +{ searchPath: (0), data: [], expected: [] } +{ searchPath: (0), data: (), expected: [] } +{ searchPath: (0), data: {}, expected: [] } + +// not containers +{ searchPath: (0), data: null, expected: [] } +{ searchPath: (0), data: true, expected: [] } +{ searchPath: (0), data: 1, expected: [] } +{ searchPath: (0), data: 1e0, expected: [] } +{ searchPath: (0), data: 1.0, expected: [] } +{ searchPath: (0), data: 2018T, expected: [] } +{ searchPath: (0), data: "", expected: [] } +{ searchPath: (0), data: '', expected: [] } +{ searchPath: (0), data: {{ }}, expected: [] } +{ searchPath: (0), data: {{ "" }}, expected: [] } + + +// Wildcard only ------------------------------------------------------------------------- + +// matches +{ searchPath: (*), data: [1], expected: [1] } +{ searchPath: (*), data: (1), expected: [1] } +{ searchPath: (*), data: {f: 1}, expected: [1] } +{ searchPath: (*), data: [1, 2], expected: [1, 2] } +{ searchPath: (*), data: (1 3), expected: [1, 3] } +{ searchPath: (*), data: {f1: 1, f2: 2}, expected: [1, 2] } +{ searchPath: (* *), data: [1, [2]], expected: [2] } +{ searchPath: (* *), data: (1 (3)), expected: [3] } +{ searchPath: (* *), data: {f1: 1, f2: {f3: 2}}, expected: [2] } + +// insufficient depth +{ searchPath: (* *), data: [1], expected: [] } +{ searchPath: (* *), data: (1), expected: [] } +{ searchPath: (* *), data: {f1: 1}, expected: [] } +{ searchPath: (* *), data: [1, 2], expected: [] } +{ searchPath: (* *), data: (1 2), expected: [] } +{ searchPath: (* *), data: {f1: 1, f2: 2}, expected: [] } + +// step out +{ searchPath: (* *), data: [[1], [2]], expected: [1], stepOutN: 2 } + +// empty containers +{ searchPath: (*), data: [], expected: [] } +{ searchPath: (*), data: (), expected: [] } +{ searchPath: (*), data: {}, expected: [] } + +// not containers +{ searchPath: (*), data: null, expected: [] } +{ searchPath: (*), data: true, expected: [] } +{ searchPath: (*), data: 1, expected: [] } +{ searchPath: (*), data: 1e0, expected: [] } +{ searchPath: (*), data: 1.0, expected: [] } +{ searchPath: (*), data: 2018T, expected: [] } +{ searchPath: (*), data: "", expected: [] } +{ searchPath: (*), data: '', expected: [] } +{ searchPath: (*), data: {{ }}, expected: [] } +{ searchPath: (*), data: {{ "" }}, expected: [] } + + +// Empty search path --------------------------------------------------------------------- + +// containers +{ searchPath: (), data: [1], expected: [[1]] } +{ searchPath: (), data: (1), expected: [(1)] } +{ searchPath: (), data: {foo: 1}, expected: [{foo: 1}] } + +// empty containers +{ searchPath: (), data: [], expected: [[]] } +{ searchPath: (), data: (), expected: [()] } +{ searchPath: (), data: {}, expected: [{}] } + +// not containers +{ searchPath: (), data: null, expected: [null] } +{ searchPath: (), data: true, expected: [true] } +{ searchPath: (), data: 1, expected: [1] } +{ searchPath: (), data: 1e0, expected: [1e0] } +{ searchPath: (), data: 1.0, expected: [1.0] } +{ searchPath: (), data: 2018T, expected: [2018T] } +{ searchPath: (), data: "", expected: [""] } +{ searchPath: (), data: '', expected: [''] } +{ searchPath: (), data: {{ }}, expected: [{{ }}] } +{ searchPath: (), data: {{ "" }}, expected: [{{ "" }}] } + + +// Mixed path components ----------------------------------------------------------------- +{ + searchPath: (foo 1), + data: { foo: [0, 1], foo: (0 2), foo: {a: 1, b: 3}, foo: 1, bar: [0, 1] }, + expected: [1, 2, 3] +} +{ + searchPath: (foo *), + data: { foo: [1], foo: (2), foo: {bar: 3}, foo: 1, bar: (9) }, + expected: [1, 2, 3] +} +{ + searchPath: (foo * bar), + data: { foo: [ {bar: 1} ], foo: { baz: {bar: 2} } }, + expected: [1, 2] +} +{ + searchPath: (foo * 0), + data: { foo: [1, [2]], foo: {bar: (3)} }, + expected: [2, 3] +} +{ + searchPath: (foo bar 2), + data: {abc: def, foo: {bar:[1, 2, 3]}}, + expected: [3] +} +{ + searchPath: (foo bar *), + data: {abc: def, foo: {bar:[1, 2, 3]}}, + expected: [1, 2, 3] +} +{ + searchPath: (foo bar * baz), + data: {abc: def, foo: {bar:[{baz:1}, {zar:2}, {baz:3}]}}, + expected: [1, 3] +} + +// stepOut +{ + searchPath: (foo * 0), + data: { + foo: { first: [1], second: [2] }, + foo: { first: [10], second: [20] } + }, + expected: [1,10], + stepOutN: 2 +} + +// Multiple search paths ----------------------------------------------------------------- +// all match +{ searchPath: [(0), (foo)], data: {bar: 1, foo: 2}, expected: [1, 2] } + +// none match +{ searchPath: [(1), (foo)], data: [0], expected: [] } + +// multiple matchers match the same value +{ searchPath: [(1), (*)], data: [1, 2, 3], expected: [1, 2, 2, 3] } + +{ searchPath: [(foo 1), (foo 2)], data: {foo: [0, 1, 2]}, expected: [1, 2] } \ No newline at end of file From 304a356e24853f59f6f598c077c4f833f4ffbac0 Mon Sep 17 00:00:00 2001 From: barbosf Date: Wed, 17 Oct 2018 16:54:57 -0700 Subject: [PATCH 02/13] Adding a user context example Showing how you can use the lambda closure as ion-c user context --- README.md | 31 +++++++++++++++++++++++++++++++ 1 file changed, 31 insertions(+) diff --git a/README.md b/README.md index f7a7b5e..fa67fb5 100644 --- a/README.md +++ b/README.md @@ -52,6 +52,37 @@ Each time the `PathExtractor` encounters a value that matches a registered searc callback passing the reader positioned at the current value. See `PathExtractorBuilder#register` methods for more information on the callback contract. +### Examples: + +```java +// Capture all matched values into a List +final IonSystem ion = IonSystemBuilder.standard().build(); + +final List list = new ArrayList<>(); +final Function callback = (reader) -> { + IonValue ionValue = ion.newValue(reader); + list.add(ionValue); + + return 0; +}; + +final PathExtractor pathExtractor = PathExtractorBuilder.standard() + .register("(foo)", callback) + .register("(bar)", callback) + .register("(baz 1)", callback) + .build(); + +IonReader ionReader = ion.newReader("{foo: 1}" + + "{bar: 2}" + + "{baz: [10,20,30,40]}" + + "{other: 99}" +); + +pathExtractor.match(ionReader); + +// list will contain 1, 2 and 20 +``` + ## Ion Developer information See the developer guide on: http://amzn.github.io/ion-docs/guides/path-extractor-guide.html From 7a2231520cab78aa48d43db68c27581ea3dbf36c Mon Sep 17 00:00:00 2001 From: barbosf Date: Wed, 17 Oct 2018 16:55:48 -0700 Subject: [PATCH 03/13] Wildcard escape should be first annotation making the wildcard escape annotation valid only as the first annotation. Added a test to show that --- .../PathComponentParser.java | 30 ++++++++++--------- src/test/resources/test-cases.ion | 3 ++ 2 files changed, 19 insertions(+), 14 deletions(-) diff --git a/src/main/java/software/amazon/com/ionpathextraction/PathComponentParser.java b/src/main/java/software/amazon/com/ionpathextraction/PathComponentParser.java index 7579964..9d5abdc 100644 --- a/src/main/java/software/amazon/com/ionpathextraction/PathComponentParser.java +++ b/src/main/java/software/amazon/com/ionpathextraction/PathComponentParser.java @@ -17,7 +17,6 @@ import java.io.IOException; import java.util.ArrayList; -import java.util.Iterator; import java.util.List; import software.amazon.com.ionpathextraction.exceptions.PathExtractionException; import software.amazon.com.ionpathextraction.pathcomponents.Index; @@ -25,24 +24,25 @@ import software.amazon.com.ionpathextraction.pathcomponents.Text; import software.amazon.com.ionpathextraction.pathcomponents.Wildcard; import software.amazon.ion.IonReader; -import software.amazon.ion.IonSystem; import software.amazon.ion.IonType; import software.amazon.ion.IonWriter; -import software.amazon.ion.system.IonSystemBuilder; +import software.amazon.ion.system.IonReaderBuilder; +import software.amazon.ion.system.IonTextWriterBuilder; /** * Parses a search path ion expression into {@link PathComponent}s. */ class PathComponentParser { - private static final IonSystem ION_SYSTEM = IonSystemBuilder.standard().build(); + private static final IonReaderBuilder READER_BUILDER = IonReaderBuilder.standard(); + private static final IonTextWriterBuilder WRITER_BUILDER = IonTextWriterBuilder.standard(); private static final String WILDCARD_ESCAPE_ANNOTATION = "$ion_extractor_field"; List parse(final String ionPathExpression) { List pathComponents; - try (final IonReader reader = ION_SYSTEM.newReader(ionPathExpression)) { + try (final IonReader reader = newIonReader(ionPathExpression)) { checkArgument(reader.next() != null, "ionPathExpression cannot be empty"); checkArgument(reader.getType() == IonType.SEXP, "ionPathExpression must be a s-expression"); @@ -83,7 +83,7 @@ private List readStates(final IonReader reader) { private String readIonText(final IonReader reader) { StringBuilder out = new StringBuilder(); - try (IonWriter writer = ION_SYSTEM.newTextWriter(out)) { + try (IonWriter writer = newIonTextWriter(out)) { writer.writeValue(reader); } catch (IOException e) { throw new PathExtractionException(e); @@ -93,15 +93,17 @@ private String readIonText(final IonReader reader) { private boolean isWildcard(final IonReader reader) { if (reader.stringValue().equals(Wildcard.TEXT)) { - for (final Iterator iter = reader.iterateTypeAnnotations(); iter.hasNext(); ) { - final String annotation = iter.next(); - if (WILDCARD_ESCAPE_ANNOTATION.equals(annotation)) { - return false; - } - } - - return true; + final String[] annotations = reader.getTypeAnnotations(); + return annotations.length == 0 || !WILDCARD_ESCAPE_ANNOTATION.equals(annotations[0]); } return false; } + + private static IonReader newIonReader(final String ionText) { + return READER_BUILDER.build(ionText); + } + + private static IonWriter newIonTextWriter(final StringBuilder out) { + return WRITER_BUILDER.build(out); + } } diff --git a/src/test/resources/test-cases.ion b/src/test/resources/test-cases.ion index 7223c1f..75e2263 100644 --- a/src/test/resources/test-cases.ion +++ b/src/test/resources/test-cases.ion @@ -94,6 +94,9 @@ { searchPath: (* *), data: (1 (3)), expected: [3] } { searchPath: (* *), data: {f1: 1, f2: {f3: 2}}, expected: [2] } +// escape annotation is only valid as the first annotation +{ searchPath: (foo::'$ion_extractor_field'::*), data: [1, 2], expected: [1, 2]} + // insufficient depth { searchPath: (* *), data: [1], expected: [] } { searchPath: (* *), data: (1), expected: [] } From aaaa1692d61ce63cb6450b7cdb5d4bb4fcc2f01f Mon Sep 17 00:00:00 2001 From: barbosf Date: Wed, 17 Oct 2018 17:03:25 -0700 Subject: [PATCH 04/13] Make PathComponentParser static and adds a PathExtractor interface PathExtractorImpl is now package private to make easier to add new implementations in the future if necessary --- .../PathComponentParser.java | 11 +- .../com/ionpathextraction/PathExtractor.java | 175 +-------------- .../PathExtractorBuilder.java | 8 +- .../ionpathextraction/PathExtractorImpl.java | 200 ++++++++++++++++++ 4 files changed, 215 insertions(+), 179 deletions(-) create mode 100644 src/main/java/software/amazon/com/ionpathextraction/PathExtractorImpl.java diff --git a/src/main/java/software/amazon/com/ionpathextraction/PathComponentParser.java b/src/main/java/software/amazon/com/ionpathextraction/PathComponentParser.java index 9d5abdc..56b31fa 100644 --- a/src/main/java/software/amazon/com/ionpathextraction/PathComponentParser.java +++ b/src/main/java/software/amazon/com/ionpathextraction/PathComponentParser.java @@ -39,7 +39,10 @@ class PathComponentParser { private static final String WILDCARD_ESCAPE_ANNOTATION = "$ion_extractor_field"; - List parse(final String ionPathExpression) { + // only has static methods, should not be invoked + private PathComponentParser() {} + + static List parse(final String ionPathExpression) { List pathComponents; try (final IonReader reader = newIonReader(ionPathExpression)) { @@ -55,7 +58,7 @@ List parse(final String ionPathExpression) { return pathComponents; } - private List readStates(final IonReader reader) { + private static List readStates(final IonReader reader) { final List pathComponents = new ArrayList<>(); while (reader.next() != null) { @@ -81,7 +84,7 @@ private List readStates(final IonReader reader) { return pathComponents; } - private String readIonText(final IonReader reader) { + private static String readIonText(final IonReader reader) { StringBuilder out = new StringBuilder(); try (IonWriter writer = newIonTextWriter(out)) { writer.writeValue(reader); @@ -91,7 +94,7 @@ private String readIonText(final IonReader reader) { return out.toString(); } - private boolean isWildcard(final IonReader reader) { + private static boolean isWildcard(final IonReader reader) { if (reader.stringValue().equals(Wildcard.TEXT)) { final String[] annotations = reader.getTypeAnnotations(); return annotations.length == 0 || !WILDCARD_ESCAPE_ANNOTATION.equals(annotations[0]); diff --git a/src/main/java/software/amazon/com/ionpathextraction/PathExtractor.java b/src/main/java/software/amazon/com/ionpathextraction/PathExtractor.java index 2293111..71a4f51 100644 --- a/src/main/java/software/amazon/com/ionpathextraction/PathExtractor.java +++ b/src/main/java/software/amazon/com/ionpathextraction/PathExtractor.java @@ -13,15 +13,6 @@ package software.amazon.com.ionpathextraction; -import static software.amazon.com.ionpathextraction.utils.Preconditions.checkArgument; -import static software.amazon.com.ionpathextraction.utils.Preconditions.checkState; - -import java.util.ArrayDeque; -import java.util.ArrayList; -import java.util.Deque; -import java.util.List; -import java.util.function.Function; -import software.amazon.com.ionpathextraction.pathcomponents.PathComponent; import software.amazon.ion.IonReader; /** @@ -36,30 +27,12 @@ * encoded in binary Ion, the resulting skip is a seek forward in the input stream, which is inexpensive relative to the * cost of parsing (and in the case of a DOM, materializing) the skipped value. *

+ * + *

+ * WARNING:Implementations of this interface are not required to be Thread safe + *

*/ -public class PathExtractor { - - private final PathExtractorConfig config; - private final Tracker tracker; - - private final List searchPaths; - private final List> callbacks; - - PathExtractor(final List searchPaths, - final List> callbacks, - final PathExtractorConfig config) { - - this.searchPaths = searchPaths; - this.callbacks = callbacks; - this.config = config; - - int size = searchPaths.stream() - .mapToInt(SearchPath::getId) - .max() - .orElse(0); - - tracker = new Tracker(size); - } +public interface PathExtractor { /** * Iterates over the reader looking for registered search paths, when a match is found invokes the respective @@ -67,141 +40,5 @@ public class PathExtractor { * * @param reader {@link IonReader} to process. */ - public void match(final IonReader reader) { - checkArgument(reader.getDepth() == 0 || config.isMatchRelativePaths(), - "reader must be at depth zero, it was at:" + reader.getDepth()); - - // marks all search paths as active - tracker.reset(searchPaths); - matchRecursive(reader); - } - - private int matchRecursive(final IonReader reader) { - final int currentDepth = tracker.getCurrentDepth(); - int ordinal = 0; - - while (reader.next() != null) { - // will continue to next depth - final List partialMatches = new ArrayList<>(); - - boolean hasTerminalMatch = false; - for (SearchPath sp : tracker.activePaths()) { - boolean match = pathComponentMatches(sp, reader, ordinal); - boolean isTerminal = isTerminal(sp); - - if (match && isTerminal) { - hasTerminalMatch = true; - int stepOutTimes = invokeCallback(reader, sp); - if (stepOutTimes > 0) { - return stepOutTimes - 1; - } - } - - if (!isTerminal) { - // all non terminal paths are partial pathComponentMatches at depth zero - if (currentDepth == 0) { - partialMatches.add(sp); - } else if (match) { - partialMatches.add(sp); - } - } - } - - if (needsToStepIn(reader, hasTerminalMatch)) { - tracker.push(partialMatches); - reader.stepIn(); - int stepOutTimes = matchRecursive(reader); - reader.stepOut(); - tracker.pop(); - - if (stepOutTimes > 0) { - return stepOutTimes - 1; - } - } - - ordinal += 1; - } - - return 0; - } - - private int invokeCallback(final IonReader reader, final SearchPath searchPath) { - int previousReaderDepth = reader.getDepth(); - int stepOutTimes = callbacks.get(searchPath.getId()).apply(reader); - int newReaderDepth = reader.getDepth(); - - checkState(previousReaderDepth == newReaderDepth, - "Reader must be at same depth when returning from callbacks. initial: " - + previousReaderDepth - + ", new: " - + newReaderDepth); - - return stepOutTimes; - } - - private boolean needsToStepIn(final IonReader reader, final boolean hasTerminalMatches) { - if (tracker.getCurrentDepth() == 0 && hasTerminalMatches) { - return false; - } - - switch (reader.getType()) { - case LIST: - case SEXP: - case STRUCT: - case DATAGRAM: - return true; - } - - return false; - } - - private boolean pathComponentMatches(final SearchPath searchPath, - final IonReader reader, - final int currentPosition) { - // depth 0 can only match the empty search path: () - int depth = tracker.getCurrentDepth(); - List pathComponents = searchPath.getPathComponents(); - - if (depth == 0) { - return pathComponents.isEmpty(); - } else if (depth <= pathComponents.size()) { - return pathComponents.get(depth - 1).matches(reader, currentPosition, config); - } - - return false; - } - - private boolean isTerminal(final SearchPath searchPath) { - return tracker.getCurrentDepth() == searchPath.getPathComponents().size(); - } - - private static class Tracker { - - private final Deque> stack; - - Tracker(final int size) { - stack = new ArrayDeque<>(size); - } - - void reset(final List searchPaths) { - stack.clear(); - stack.push(searchPaths); - } - - List activePaths() { - return stack.peek(); - } - - int getCurrentDepth() { - return stack.size() - 1; - } - - void push(final List partialMatches) { - stack.push(partialMatches); - } - - void pop() { - stack.pop(); - } - } + void match(final IonReader reader); } diff --git a/src/main/java/software/amazon/com/ionpathextraction/PathExtractorBuilder.java b/src/main/java/software/amazon/com/ionpathextraction/PathExtractorBuilder.java index 78f516f..da0e478 100644 --- a/src/main/java/software/amazon/com/ionpathextraction/PathExtractorBuilder.java +++ b/src/main/java/software/amazon/com/ionpathextraction/PathExtractorBuilder.java @@ -32,8 +32,6 @@ public final class PathExtractorBuilder { private final List> callbacks = new ArrayList<>(); private boolean matchRelativePaths; private boolean matchCaseInsensitive; - private PathComponentParser compiler; - private PathExtractorBuilder() { } @@ -48,8 +46,6 @@ public static PathExtractorBuilder standard() { builder.matchCaseInsensitive = DEFAULT_CASE_INSENSITIVE; builder.matchRelativePaths = DEFAULT_MATCH_RELATIVE_PATHS; - builder.compiler = new PathComponentParser(); - return builder; } @@ -59,7 +55,7 @@ public static PathExtractorBuilder standard() { * @return new {@link PathExtractor} instance. */ public PathExtractor build() { - return new PathExtractor( + return new PathExtractorImpl( searchPaths, callbacks, new PathExtractorConfig(matchRelativePaths, matchCaseInsensitive) @@ -110,7 +106,7 @@ public PathExtractorBuilder register(final String searchExpressionAsIon, final Function callback) { checkArgument(searchExpressionAsIon != null, "searchExpressionAsIon cannot be null"); - List pathComponents = compiler.parse(searchExpressionAsIon); + List pathComponents = PathComponentParser.parse(searchExpressionAsIon); register(pathComponents, callback); return this; diff --git a/src/main/java/software/amazon/com/ionpathextraction/PathExtractorImpl.java b/src/main/java/software/amazon/com/ionpathextraction/PathExtractorImpl.java new file mode 100644 index 0000000..e1a7659 --- /dev/null +++ b/src/main/java/software/amazon/com/ionpathextraction/PathExtractorImpl.java @@ -0,0 +1,200 @@ +/* + * Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. + * Licensed under the Apache License, Version 2.0 (the "License"). + * You may not use this file except in compliance with the License. + * A copy of the License is located at: + * + * http://aws.amazon.com/apache2.0/ + * + * or in the "license" file accompanying this file. This file is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific + * language governing permissions and limitations under the License. + */ + +package software.amazon.com.ionpathextraction; + +import static software.amazon.com.ionpathextraction.utils.Preconditions.checkArgument; +import static software.amazon.com.ionpathextraction.utils.Preconditions.checkState; + +import java.util.ArrayDeque; +import java.util.ArrayList; +import java.util.Deque; +import java.util.List; +import java.util.function.Function; +import software.amazon.com.ionpathextraction.pathcomponents.PathComponent; +import software.amazon.ion.IonReader; + +/** + *

+ * Default implementation of {@link PathExtractor}. + *

+ *

+ * WARNING: not Thread safe. + *

+ */ +class PathExtractorImpl implements PathExtractor { + + private final PathExtractorConfig config; + private final Tracker tracker; + + private final List searchPaths; + private final List> callbacks; + + /** + * Constructor, should only be invoked by {@link PathExtractorBuilder}. + */ + PathExtractorImpl(final List searchPaths, + final List> callbacks, + final PathExtractorConfig config) { + + this.searchPaths = searchPaths; + this.callbacks = callbacks; + this.config = config; + + int size = searchPaths.stream() + .mapToInt(SearchPath::getId) + .max() + .orElse(0); + + tracker = new Tracker(size); + } + + @Override + public void match(final IonReader reader) { + checkArgument(reader.getDepth() == 0 || config.isMatchRelativePaths(), + "reader must be at depth zero, it was at:" + reader.getDepth()); + + // marks all search paths as active + tracker.reset(searchPaths); + matchRecursive(reader); + } + + private int matchRecursive(final IonReader reader) { + final int currentDepth = tracker.getCurrentDepth(); + int ordinal = 0; + + while (reader.next() != null) { + // will continue to next depth + final List partialMatches = new ArrayList<>(); + + boolean hasTerminalMatch = false; + for (SearchPath sp : tracker.activePaths()) { + boolean match = pathComponentMatches(sp, reader, ordinal); + boolean isTerminal = isTerminal(sp); + + if (match && isTerminal) { + hasTerminalMatch = true; + int stepOutTimes = invokeCallback(reader, sp); + if (stepOutTimes > 0) { + return stepOutTimes - 1; + } + } + + if (!isTerminal) { + // all non terminal paths are partial pathComponentMatches at depth zero + if (currentDepth == 0) { + partialMatches.add(sp); + } else if (match) { + partialMatches.add(sp); + } + } + } + + if (needsToStepIn(reader, hasTerminalMatch)) { + tracker.push(partialMatches); + reader.stepIn(); + int stepOutTimes = matchRecursive(reader); + reader.stepOut(); + tracker.pop(); + + if (stepOutTimes > 0) { + return stepOutTimes - 1; + } + } + + ordinal += 1; + } + + return 0; + } + + private int invokeCallback(final IonReader reader, final SearchPath searchPath) { + int previousReaderDepth = reader.getDepth(); + int stepOutTimes = callbacks.get(searchPath.getId()).apply(reader); + int newReaderDepth = reader.getDepth(); + + checkState(previousReaderDepth == newReaderDepth, + "Reader must be at same depth when returning from callbacks. initial: " + + previousReaderDepth + + ", new: " + + newReaderDepth); + + return stepOutTimes; + } + + private boolean needsToStepIn(final IonReader reader, final boolean hasTerminalMatches) { + if (tracker.getCurrentDepth() == 0 && hasTerminalMatches) { + return false; + } + + switch (reader.getType()) { + case LIST: + case SEXP: + case STRUCT: + case DATAGRAM: + return true; + } + + return false; + } + + private boolean pathComponentMatches(final SearchPath searchPath, + final IonReader reader, + final int currentPosition) { + // depth 0 can only match the empty search path: () + int depth = tracker.getCurrentDepth(); + List pathComponents = searchPath.getPathComponents(); + + if (depth == 0) { + return pathComponents.isEmpty(); + } else if (depth <= pathComponents.size()) { + return pathComponents.get(depth - 1).matches(reader, currentPosition, config); + } + + return false; + } + + private boolean isTerminal(final SearchPath searchPath) { + return tracker.getCurrentDepth() == searchPath.getPathComponents().size(); + } + + private static class Tracker { + + private final Deque> stack; + + Tracker(final int size) { + stack = new ArrayDeque<>(size); + } + + void reset(final List searchPaths) { + stack.clear(); + stack.push(searchPaths); + } + + List activePaths() { + return stack.peek(); + } + + int getCurrentDepth() { + return stack.size() - 1; + } + + void push(final List partialMatches) { + stack.push(partialMatches); + } + + void pop() { + stack.pop(); + } + } +} From 75363745e6531afb1ecee9d4429c3d217dbd89e7 Mon Sep 17 00:00:00 2001 From: barbosf Date: Wed, 17 Oct 2018 17:19:45 -0700 Subject: [PATCH 05/13] short circuiting zero search paths and test Already worked, but short circuiting makes it clearer and avoids unecessary work. Also adds missing tests for this use case --- .../amazon/com/ionpathextraction/PathExtractorImpl.java | 5 +++++ src/test/resources/test-cases.ion | 7 +++++++ 2 files changed, 12 insertions(+) diff --git a/src/main/java/software/amazon/com/ionpathextraction/PathExtractorImpl.java b/src/main/java/software/amazon/com/ionpathextraction/PathExtractorImpl.java index e1a7659..e7fff83 100644 --- a/src/main/java/software/amazon/com/ionpathextraction/PathExtractorImpl.java +++ b/src/main/java/software/amazon/com/ionpathextraction/PathExtractorImpl.java @@ -64,6 +64,11 @@ public void match(final IonReader reader) { checkArgument(reader.getDepth() == 0 || config.isMatchRelativePaths(), "reader must be at depth zero, it was at:" + reader.getDepth()); + // short circuit when there are zero SearchPaths + if(searchPaths.isEmpty()) { + return; + } + // marks all search paths as active tracker.reset(searchPaths); matchRecursive(reader); diff --git a/src/test/resources/test-cases.ion b/src/test/resources/test-cases.ion index 75e2263..ed177ff 100644 --- a/src/test/resources/test-cases.ion +++ b/src/test/resources/test-cases.ion @@ -1,3 +1,10 @@ +// zero search paths --------------------------------------------------------------------- +// no-op extractor, data doesn't matter +{ searchPath: [], expected: [], data: {foo: 1} } +{ searchPath: [], expected: [], data: (3 4) } +{ searchPath: [], expected: [], data: 99 } +{ searchPath: [], expected: [], data: [1, 2] } + // Field only ---------------------------------------------------------------------------- // matches From ec97718e123c976446b5f05117c9d05b4b8e1cf3 Mon Sep 17 00:00:00 2001 From: barbosf Date: Wed, 17 Oct 2018 17:38:43 -0700 Subject: [PATCH 06/13] Bugfix: fails when the callback return is higher than the reader relative depth --- .../PathExtractorBuilder.java | 39 +++++++++++-------- .../ionpathextraction/PathExtractorImpl.java | 27 +++++++++++-- .../ionpathextraction/PathExtractorTest.kt | 34 ++++++++++++++++ 3 files changed, 80 insertions(+), 20 deletions(-) diff --git a/src/main/java/software/amazon/com/ionpathextraction/PathExtractorBuilder.java b/src/main/java/software/amazon/com/ionpathextraction/PathExtractorBuilder.java index da0e478..1493eeb 100644 --- a/src/main/java/software/amazon/com/ionpathextraction/PathExtractorBuilder.java +++ b/src/main/java/software/amazon/com/ionpathextraction/PathExtractorBuilder.java @@ -115,28 +115,33 @@ public PathExtractorBuilder register(final String searchExpressionAsIon, /** * Register a callback for a search path. *

- * The callback receives the matcher's {@link IonReader}, positioned on the matching value, so that it can use the - * appropriate reader method to access the value. The callback return value is as a ‘step-out-N’ instruction. - * The most common value is zero, which tells the extractor to continue with the next value at the same depth. - * A return value greater than zero may be useful to users who only care about the first match at a particular - * depth + * The callback receives the matcher's {@link IonReader}, positioned on the matching value, so that it can use + * the appropriate reader method to access the value. The callback return value is as a ‘step-out-N’ + * instruction. The most common value is zero, which tells the extractor to continue with the next value at the + * same depth. A return value greater than zero may be useful to users who only care about the first match at a + * particular depth *

+ * *

- * Callback implementations MUST comply with the following: + * Callback implementations MUST comply with the following: *

+ * *
    - *
  • - * The reader must not be advanced past the matching value. Violating this will cause the following value to be - * skipped. If a value is skipped, neither the value itself nor any of its children will be checked for match - * against any of the extractor's registered paths. - *
  • - *
  • - * If the reader is positioned on a container value, its cursor must be at the same depth when the callback returns. - * In other words, if the user steps in to the matched value, it must step out an equal number of times. Violating - * this will raise an error. - *
  • + *
  • + * The reader must not be advanced past the matching value. Violating this will cause the following value to + * be skipped. If a value is skipped, neither the value itself nor any of its children will be checked for + * match against any of the extractor's registered paths. + *
  • + *
  • + * If the reader is positioned on a container value, its cursor must be at the same depth when the callback + * returns. In other words, if the user steps in to the matched value, it must step out an equal number of + * times. Violating this will raise an error. + *
  • + *
  • + * Return value must be between zero and the the current reader relative depth, for example the following + * search path (foo bar) must return values between 0 and 2 inclusive. + *
  • *
- *

* * @param pathComponents search path as a list of path components. * @param callback callback to be registered. diff --git a/src/main/java/software/amazon/com/ionpathextraction/PathExtractorImpl.java b/src/main/java/software/amazon/com/ionpathextraction/PathExtractorImpl.java index e7fff83..8908b38 100644 --- a/src/main/java/software/amazon/com/ionpathextraction/PathExtractorImpl.java +++ b/src/main/java/software/amazon/com/ionpathextraction/PathExtractorImpl.java @@ -26,10 +26,10 @@ /** *

- * Default implementation of {@link PathExtractor}. + * Default implementation of {@link PathExtractor}. *

*

- * WARNING: not Thread safe. + * WARNING: not Thread safe. *

*/ class PathExtractorImpl implements PathExtractor { @@ -65,12 +65,14 @@ public void match(final IonReader reader) { "reader must be at depth zero, it was at:" + reader.getDepth()); // short circuit when there are zero SearchPaths - if(searchPaths.isEmpty()) { + if (searchPaths.isEmpty()) { return; } // marks all search paths as active tracker.reset(searchPaths); + tracker.setInitialReaderDepth(reader.getDepth()); + matchRecursive(reader); } @@ -134,6 +136,16 @@ private int invokeCallback(final IonReader reader, final SearchPath searchPath) + ", new: " + newReaderDepth); + // we don't allow users to step out the initial reader depth + int readerRelativeDepth = reader.getDepth() - tracker.getInitialReaderDepth(); + + checkState(stepOutTimes <= readerRelativeDepth, + "Callback return cannot be greater than the reader current relative depth." + + " return: " + + stepOutTimes + + ", relative reader depth: " + + readerRelativeDepth); + return stepOutTimes; } @@ -176,6 +188,7 @@ private boolean isTerminal(final SearchPath searchPath) { private static class Tracker { private final Deque> stack; + private int initialReaderDepth; Tracker(final int size) { stack = new ArrayDeque<>(size); @@ -201,5 +214,13 @@ void push(final List partialMatches) { void pop() { stack.pop(); } + + void setInitialReaderDepth(final int depth) { + initialReaderDepth = depth; + } + + public int getInitialReaderDepth() { + return initialReaderDepth; + } } } diff --git a/src/test/kotlin/software/amazon/com/ionpathextraction/PathExtractorTest.kt b/src/test/kotlin/software/amazon/com/ionpathextraction/PathExtractorTest.kt index 947efa6..2816cd6 100644 --- a/src/test/kotlin/software/amazon/com/ionpathextraction/PathExtractorTest.kt +++ b/src/test/kotlin/software/amazon/com/ionpathextraction/PathExtractorTest.kt @@ -156,6 +156,40 @@ class PathExtractorTest { assertEquals(ION.singleValue("[1,2,3]"), out) } + @Test + fun stepOutMoreThanPermitted() { + val extractor = PathExtractorBuilder.standard() + .register("(foo)") { _ -> 200 } + .build() + + val exception = assertThrows { + extractor.match(ION.newReader("{foo: 1}")) + } + + assertEquals("Callback return cannot be greater than the reader current relative depth. " + + "return: 200, relative reader depth: 1", exception.message) + } + + @Test + fun stepOutMoreThanPermittedWithRelative() { + val extractor = PathExtractorBuilder.standard() + .withMatchRelativePaths(true) + // even though you could step out twice in reader you can't given the initial reader depth + .register("(bar)") { _ -> 2 } + .build() + + val newReader = ION.newReader("{foo: {bar: 1}}") + newReader.next() + newReader.stepIn() // positioned at the beginning of {bar: 1} + + val exception = assertThrows { + extractor.match(newReader) + } + + assertEquals("Callback return cannot be greater than the reader current relative depth. return: 2, " + + "relative reader depth: 1", exception.message) + } + // Invalid configuration ----------------------------------------------------------------------------- @Test From 90ab6dd41b2280f0293864cff8ed86b6973d77f0 Mon Sep 17 00:00:00 2001 From: barbosf Date: Wed, 17 Oct 2018 17:53:14 -0700 Subject: [PATCH 07/13] fixing initial Tracker size and some other spelling and missing comments --- .../com/ionpathextraction/PathExtractorBuilder.java | 8 ++++---- .../amazon/com/ionpathextraction/PathExtractorImpl.java | 6 +++--- .../software/amazon/com/ionpathextraction/SearchPath.java | 6 ------ 3 files changed, 7 insertions(+), 13 deletions(-) diff --git a/src/main/java/software/amazon/com/ionpathextraction/PathExtractorBuilder.java b/src/main/java/software/amazon/com/ionpathextraction/PathExtractorBuilder.java index 1493eeb..1e30a8d 100644 --- a/src/main/java/software/amazon/com/ionpathextraction/PathExtractorBuilder.java +++ b/src/main/java/software/amazon/com/ionpathextraction/PathExtractorBuilder.java @@ -116,10 +116,10 @@ public PathExtractorBuilder register(final String searchExpressionAsIon, * Register a callback for a search path. *

* The callback receives the matcher's {@link IonReader}, positioned on the matching value, so that it can use - * the appropriate reader method to access the value. The callback return value is as a ‘step-out-N’ - * instruction. The most common value is zero, which tells the extractor to continue with the next value at the - * same depth. A return value greater than zero may be useful to users who only care about the first match at a - * particular depth + * the appropriate reader method to access the value. The callback return value is a ‘step-out-N’ instruction. + * The most common value is zero, which tells the extractor to continue with the next value at the same depth. A + * return value greater than zero may be useful to users who only care about the first match at a particular + * depth. *

* *

diff --git a/src/main/java/software/amazon/com/ionpathextraction/PathExtractorImpl.java b/src/main/java/software/amazon/com/ionpathextraction/PathExtractorImpl.java index 8908b38..9321a95 100644 --- a/src/main/java/software/amazon/com/ionpathextraction/PathExtractorImpl.java +++ b/src/main/java/software/amazon/com/ionpathextraction/PathExtractorImpl.java @@ -51,12 +51,12 @@ class PathExtractorImpl implements PathExtractor { this.callbacks = callbacks; this.config = config; - int size = searchPaths.stream() - .mapToInt(SearchPath::getId) + int maxSearchPathDepth = searchPaths.stream() + .mapToInt(sp -> sp.getPathComponents().size()) .max() .orElse(0); - tracker = new Tracker(size); + tracker = new Tracker(maxSearchPathDepth); } @Override diff --git a/src/main/java/software/amazon/com/ionpathextraction/SearchPath.java b/src/main/java/software/amazon/com/ionpathextraction/SearchPath.java index 29bdbda..3ff5194 100644 --- a/src/main/java/software/amazon/com/ionpathextraction/SearchPath.java +++ b/src/main/java/software/amazon/com/ionpathextraction/SearchPath.java @@ -13,14 +13,8 @@ package software.amazon.com.ionpathextraction; -import static software.amazon.com.ionpathextraction.utils.Preconditions.checkArgument; - import java.util.List; -import java.util.function.Function; import software.amazon.com.ionpathextraction.pathcomponents.PathComponent; -import software.amazon.ion.IonReader; - -// FIXME this is a weird class, probably better to remove it /** * A path which is provided to the extractor for matching. From 9788b3a7ace2ad3beec429fcde15675a90ac992a Mon Sep 17 00:00:00 2001 From: barbosf Date: Wed, 17 Oct 2018 17:55:30 -0700 Subject: [PATCH 08/13] renaming builder's register methods to withSearchPath --- .../PathExtractorBuilder.java | 20 +++++++------- .../ionpathextraction/PathExtractorTest.kt | 26 +++++++++---------- 2 files changed, 23 insertions(+), 23 deletions(-) diff --git a/src/main/java/software/amazon/com/ionpathextraction/PathExtractorBuilder.java b/src/main/java/software/amazon/com/ionpathextraction/PathExtractorBuilder.java index 1e30a8d..0b71516 100644 --- a/src/main/java/software/amazon/com/ionpathextraction/PathExtractorBuilder.java +++ b/src/main/java/software/amazon/com/ionpathextraction/PathExtractorBuilder.java @@ -97,17 +97,17 @@ public PathExtractorBuilder withMatchCaseInsensitive(final boolean matchCaseInse /** * Register a callback for a search path. * - * @param searchExpressionAsIon string representation of a search path. + * @param searchPathAsIon string representation of a search path. * @param callback callback to be registered. * @return builder for chaining. - * @see PathExtractorBuilder#register(List, Function) + * @see PathExtractorBuilder#withSearchPath(List, Function) */ - public PathExtractorBuilder register(final String searchExpressionAsIon, - final Function callback) { - checkArgument(searchExpressionAsIon != null, "searchExpressionAsIon cannot be null"); + public PathExtractorBuilder withSearchPath(final String searchPathAsIon, + final Function callback) { + checkArgument(searchPathAsIon != null, "searchPathAsIon cannot be null"); - List pathComponents = PathComponentParser.parse(searchExpressionAsIon); - register(pathComponents, callback); + List pathComponents = PathComponentParser.parse(searchPathAsIon); + withSearchPath(pathComponents, callback); return this; } @@ -146,10 +146,10 @@ public PathExtractorBuilder register(final String searchExpressionAsIon, * @param pathComponents search path as a list of path components. * @param callback callback to be registered. * @return builder for chaining. - * @see PathExtractorBuilder#register(String, Function) + * @see PathExtractorBuilder#withSearchPath(String, Function) */ - public PathExtractorBuilder register(final List pathComponents, - final Function callback) { + public PathExtractorBuilder withSearchPath(final List pathComponents, + final Function callback) { checkArgument(pathComponents != null, "pathComponents cannot be null"); checkArgument(callback != null, "callback cannot be null"); diff --git a/src/test/kotlin/software/amazon/com/ionpathextraction/PathExtractorTest.kt b/src/test/kotlin/software/amazon/com/ionpathextraction/PathExtractorTest.kt index 2816cd6..8925f44 100644 --- a/src/test/kotlin/software/amazon/com/ionpathextraction/PathExtractorTest.kt +++ b/src/test/kotlin/software/amazon/com/ionpathextraction/PathExtractorTest.kt @@ -80,7 +80,7 @@ class PathExtractorTest { val out = ION.newEmptyList() val builder = PathExtractorBuilder.standard() - testCase.searchPaths.forEach { builder.register(it, collectToIonList(out, testCase.stepOutNumber)) } + testCase.searchPaths.forEach { builder.withSearchPath(it, collectToIonList(out, testCase.stepOutNumber)) } val extractor = builder.build() extractor.match(ION.newReader(testCase.data)) @@ -94,11 +94,11 @@ class PathExtractorTest { var timesCallback2Called = 0 val extractor = PathExtractorBuilder.standard() - .register("(foo)") { + .withSearchPath("(foo)") { timesCallback1Called++ 0 } - .register("(bar)") { + .withSearchPath("(bar)") { timesCallback2Called++ 0 } @@ -115,7 +115,7 @@ class PathExtractorTest { @Test fun readerAtInvalidDepth() { val extractor = PathExtractorBuilder.standard() - .register("(foo)") { 0 } + .withSearchPath("(foo)") { 0 } .build() val reader = ION.newReader("[{foo: 1}]") @@ -131,7 +131,7 @@ class PathExtractorTest { val out = ION.newEmptyList() val extractor = PathExtractorBuilder.standard() .withMatchRelativePaths(true) - .register("(foo)", collectToIonList(out, 0)) + .withSearchPath("(foo)", collectToIonList(out, 0)) .build() val reader = ION.newReader("[{foo: 1}]") @@ -148,7 +148,7 @@ class PathExtractorTest { val out = ION.newEmptyList() val extractor = PathExtractorBuilder.standard() .withMatchCaseInsensitive(true) - .register("(foo)", collectToIonList(out, 0)) + .withSearchPath("(foo)", collectToIonList(out, 0)) .build() extractor.match(ION.newReader("{FOO: 1}{foo: 2}{fOo: 3}{bar: 4}")) @@ -159,7 +159,7 @@ class PathExtractorTest { @Test fun stepOutMoreThanPermitted() { val extractor = PathExtractorBuilder.standard() - .register("(foo)") { _ -> 200 } + .withSearchPath("(foo)") { _ -> 200 } .build() val exception = assertThrows { @@ -175,7 +175,7 @@ class PathExtractorTest { val extractor = PathExtractorBuilder.standard() .withMatchRelativePaths(true) // even though you could step out twice in reader you can't given the initial reader depth - .register("(bar)") { _ -> 2 } + .withSearchPath("(bar)") { _ -> 2 } .build() val newReader = ION.newReader("{foo: {bar: 1}}") @@ -195,7 +195,7 @@ class PathExtractorTest { @Test fun nullStringPath() { val exception = assertThrows { - PathExtractorBuilder.standard().register(null as String?, emptyCallback) + PathExtractorBuilder.standard().withSearchPath(null as String?, emptyCallback) } assertEquals("searchExpressionAsIon cannot be null", exception.message) @@ -204,7 +204,7 @@ class PathExtractorTest { @Test fun nullListPath() { val exception = assertThrows { - PathExtractorBuilder.standard().register(null as List?, emptyCallback) + PathExtractorBuilder.standard().withSearchPath(null as List?, emptyCallback) } assertEquals("pathComponents cannot be null", exception.message) @@ -213,7 +213,7 @@ class PathExtractorTest { @Test fun nullCallback() { val exception = assertThrows { - PathExtractorBuilder.standard().register("(foo)", null) + PathExtractorBuilder.standard().withSearchPath("(foo)", null) } assertEquals("callback cannot be null", exception.message) @@ -222,7 +222,7 @@ class PathExtractorTest { @Test fun emptySearchPath() { val exception = assertThrows { - PathExtractorBuilder.standard().register("", emptyCallback) + PathExtractorBuilder.standard().withSearchPath("", emptyCallback) } assertEquals("ionPathExpression cannot be empty", exception.message) @@ -231,7 +231,7 @@ class PathExtractorTest { @Test fun searchPathNotSexp() { val exception = assertThrows { - PathExtractorBuilder.standard().register("1", emptyCallback) + PathExtractorBuilder.standard().withSearchPath("1", emptyCallback) } assertEquals("ionPathExpression must be a s-expression", exception.message) From 0ef0b5d256318cf7c8a8ac5caa554562c8722e59 Mon Sep 17 00:00:00 2001 From: barbosf Date: Wed, 17 Oct 2018 18:03:37 -0700 Subject: [PATCH 09/13] using IonType.isContainer instead of hand coding container detection --- .../com/ionpathextraction/PathExtractorImpl.java | 13 +++---------- 1 file changed, 3 insertions(+), 10 deletions(-) diff --git a/src/main/java/software/amazon/com/ionpathextraction/PathExtractorImpl.java b/src/main/java/software/amazon/com/ionpathextraction/PathExtractorImpl.java index 9321a95..3fc3de6 100644 --- a/src/main/java/software/amazon/com/ionpathextraction/PathExtractorImpl.java +++ b/src/main/java/software/amazon/com/ionpathextraction/PathExtractorImpl.java @@ -23,6 +23,7 @@ import java.util.function.Function; import software.amazon.com.ionpathextraction.pathcomponents.PathComponent; import software.amazon.ion.IonReader; +import software.amazon.ion.IonType; /** *

@@ -154,15 +155,7 @@ private boolean needsToStepIn(final IonReader reader, final boolean hasTerminalM return false; } - switch (reader.getType()) { - case LIST: - case SEXP: - case STRUCT: - case DATAGRAM: - return true; - } - - return false; + return IonType.isContainer(reader.getType()); } private boolean pathComponentMatches(final SearchPath searchPath, @@ -219,7 +212,7 @@ void setInitialReaderDepth(final int depth) { initialReaderDepth = depth; } - public int getInitialReaderDepth() { + int getInitialReaderDepth() { return initialReaderDepth; } } From ae5525c4a913818ae20fa150da03361b976b6d5e Mon Sep 17 00:00:00 2001 From: barbosf Date: Wed, 17 Oct 2018 18:16:03 -0700 Subject: [PATCH 10/13] adding support for specifying search paths as Ion lists --- .../PathComponentParser.java | 3 +- .../ionpathextraction/PathExtractorTest.kt | 19 +++++--- src/test/resources/test-cases.ion | 45 +++++++++++++++---- 3 files changed, 50 insertions(+), 17 deletions(-) diff --git a/src/main/java/software/amazon/com/ionpathextraction/PathComponentParser.java b/src/main/java/software/amazon/com/ionpathextraction/PathComponentParser.java index 56b31fa..38114d0 100644 --- a/src/main/java/software/amazon/com/ionpathextraction/PathComponentParser.java +++ b/src/main/java/software/amazon/com/ionpathextraction/PathComponentParser.java @@ -47,7 +47,8 @@ static List parse(final String ionPathExpression) { try (final IonReader reader = newIonReader(ionPathExpression)) { checkArgument(reader.next() != null, "ionPathExpression cannot be empty"); - checkArgument(reader.getType() == IonType.SEXP, "ionPathExpression must be a s-expression"); + checkArgument(reader.getType() == IonType.SEXP || reader.getType() == IonType.LIST, + "ionPathExpression must be a s-expression or list"); reader.stepIn(); pathComponents = readStates(reader); diff --git a/src/test/kotlin/software/amazon/com/ionpathextraction/PathExtractorTest.kt b/src/test/kotlin/software/amazon/com/ionpathextraction/PathExtractorTest.kt index 8925f44..9ef10db 100644 --- a/src/test/kotlin/software/amazon/com/ionpathextraction/PathExtractorTest.kt +++ b/src/test/kotlin/software/amazon/com/ionpathextraction/PathExtractorTest.kt @@ -19,6 +19,7 @@ import org.junit.jupiter.api.assertAll import org.junit.jupiter.api.assertThrows import org.junit.jupiter.params.ParameterizedTest import org.junit.jupiter.params.provider.MethodSource +import software.amazon.com.ionpathextraction.PathExtractorTest.Companion.toText import software.amazon.com.ionpathextraction.exceptions.PathExtractionException import software.amazon.com.ionpathextraction.pathcomponents.PathComponent import software.amazon.ion.* @@ -52,10 +53,14 @@ class PathExtractorTest { ION.loader.load(File("src/test/resources/test-cases.ion")) .map { it as IonStruct } .map { struct -> - val searchPathIonValue = struct["searchPath"] - val searchPaths = when (searchPathIonValue) { - is IonList -> searchPathIonValue.map { it.toText() } - else -> listOf(searchPathIonValue.toText()) + + // single + val searchPaths = if(struct.containsKey("searchPath")) { + listOf(struct["searchPath"].toText()) + } + // multiple + else { + (struct["searchPaths"] as IonSequence).map { it.toText() } } TestCase( @@ -198,7 +203,7 @@ class PathExtractorTest { PathExtractorBuilder.standard().withSearchPath(null as String?, emptyCallback) } - assertEquals("searchExpressionAsIon cannot be null", exception.message) + assertEquals("searchPathAsIon cannot be null", exception.message) } @Test @@ -229,11 +234,11 @@ class PathExtractorTest { } @Test - fun searchPathNotSexp() { + fun searchPathNotSequence() { val exception = assertThrows { PathExtractorBuilder.standard().withSearchPath("1", emptyCallback) } - assertEquals("ionPathExpression must be a s-expression", exception.message) + assertEquals("ionPathExpression must be a s-expression or list", exception.message) } } \ No newline at end of file diff --git a/src/test/resources/test-cases.ion b/src/test/resources/test-cases.ion index ed177ff..1cdb7a4 100644 --- a/src/test/resources/test-cases.ion +++ b/src/test/resources/test-cases.ion @@ -1,14 +1,38 @@ +/* + +Test case spec: + +single search path: +{ + searchPath: , + data: , + expected: +} + +multiple search paths: +{ + searchPaths: , + data: , + expected: +} + +Only difference is that for multiple the searchPath key is pluralized to searchPaths and expects an Ion sequence of +search paths + +*/ + // zero search paths --------------------------------------------------------------------- // no-op extractor, data doesn't matter -{ searchPath: [], expected: [], data: {foo: 1} } -{ searchPath: [], expected: [], data: (3 4) } -{ searchPath: [], expected: [], data: 99 } -{ searchPath: [], expected: [], data: [1, 2] } +{ searchPaths: [], expected: [], data: {foo: 1} } +{ searchPaths: [], expected: [], data: (3 4) } +{ searchPaths: [], expected: [], data: 99 } +{ searchPaths: [], expected: [], data: [1, 2] } // Field only ---------------------------------------------------------------------------- // matches { searchPath: (foo), data: {foo: 1}, expected: [1] } +{ searchPath: [foo], data: {foo: 1}, expected: [1] } { searchPath: (foo bar), data: {foo: {bar : 2}}, expected: [2] } // escaped wildcard @@ -56,6 +80,7 @@ // matches { searchPath: (0), data: [1], expected: [1] } +{ searchPath: [0], data: [1], expected: [1] } { searchPath: (0), data: (1), expected: [1] } { searchPath: (0), data: {f: 1}, expected: [1] } { searchPath: (1), data: [1, 2], expected: [2] } @@ -92,6 +117,7 @@ // matches { searchPath: (*), data: [1], expected: [1] } +{ searchPath: ['*'], data: [1], expected: [1] } { searchPath: (*), data: (1), expected: [1] } { searchPath: (*), data: {f: 1}, expected: [1] } { searchPath: (*), data: [1, 2], expected: [1, 2] } @@ -137,6 +163,7 @@ // containers { searchPath: (), data: [1], expected: [[1]] } +{ searchPath: [], data: [1], expected: [[1]] } { searchPath: (), data: (1), expected: [(1)] } { searchPath: (), data: {foo: 1}, expected: [{foo: 1}] } @@ -165,7 +192,7 @@ expected: [1, 2, 3] } { - searchPath: (foo *), + searchPath: [foo, '*'], data: { foo: [1], foo: (2), foo: {bar: 3}, foo: 1, bar: (9) }, expected: [1, 2, 3] } @@ -208,12 +235,12 @@ // Multiple search paths ----------------------------------------------------------------- // all match -{ searchPath: [(0), (foo)], data: {bar: 1, foo: 2}, expected: [1, 2] } +{ searchPaths: [(0), (foo)], data: {bar: 1, foo: 2}, expected: [1, 2] } // none match -{ searchPath: [(1), (foo)], data: [0], expected: [] } +{ searchPaths: [(1), [foo]], data: [0], expected: [] } // multiple matchers match the same value -{ searchPath: [(1), (*)], data: [1, 2, 3], expected: [1, 2, 2, 3] } +{ searchPaths: [(1), (*)], data: [1, 2, 3], expected: [1, 2, 2, 3] } -{ searchPath: [(foo 1), (foo 2)], data: {foo: [0, 1, 2]}, expected: [1, 2] } \ No newline at end of file +{ searchPaths: [(foo 1), (foo 2)], data: {foo: [0, 1, 2]}, expected: [1, 2] } \ No newline at end of file From 1fd93984c6c6c4030e679dd01c6cacbd2f8ea145 Mon Sep 17 00:00:00 2001 From: barbosf Date: Thu, 18 Oct 2018 10:32:34 -0700 Subject: [PATCH 11/13] Addressing multiple PR comments * Moving callback to SearchPath * Simplifying partial match detection logic * Fixing needToStepIn logic * Adding tests for nested search paths --- .../PathExtractorBuilder.java | 15 ++++----- .../ionpathextraction/PathExtractorImpl.java | 28 +++------------- .../com/ionpathextraction/SearchPath.java | 16 +++++---- .../ionpathextraction/PathExtractorTest.kt | 33 +++++++++++++++++-- 4 files changed, 52 insertions(+), 40 deletions(-) diff --git a/src/main/java/software/amazon/com/ionpathextraction/PathExtractorBuilder.java b/src/main/java/software/amazon/com/ionpathextraction/PathExtractorBuilder.java index 0b71516..75f7766 100644 --- a/src/main/java/software/amazon/com/ionpathextraction/PathExtractorBuilder.java +++ b/src/main/java/software/amazon/com/ionpathextraction/PathExtractorBuilder.java @@ -29,7 +29,6 @@ public final class PathExtractorBuilder { private static final boolean DEFAULT_MATCH_RELATIVE_PATHS = false; private static final boolean DEFAULT_CASE_INSENSITIVE = false; private final List searchPaths = new ArrayList<>(); - private final List> callbacks = new ArrayList<>(); private boolean matchRelativePaths; private boolean matchCaseInsensitive; @@ -55,11 +54,7 @@ public static PathExtractorBuilder standard() { * @return new {@link PathExtractor} instance. */ public PathExtractor build() { - return new PathExtractorImpl( - searchPaths, - callbacks, - new PathExtractorConfig(matchRelativePaths, matchCaseInsensitive) - ); + return new PathExtractorImpl(searchPaths, new PathExtractorConfig(matchRelativePaths, matchCaseInsensitive)); } /** @@ -141,6 +136,11 @@ public PathExtractorBuilder withSearchPath(final String searchPathAsIon, * Return value must be between zero and the the current reader relative depth, for example the following * search path (foo bar) must return values between 0 and 2 inclusive. * + *

  • + * When there are nested search paths, e.g. (foo) and (foo bar), the callback for (foo) should not read the + * reader value if it's a container. Doing so will advance the reader to the end of the container making + * impossible to match (foo bar). + *
  • * * * @param pathComponents search path as a list of path components. @@ -153,8 +153,7 @@ public PathExtractorBuilder withSearchPath(final List pathCompone checkArgument(pathComponents != null, "pathComponents cannot be null"); checkArgument(callback != null, "callback cannot be null"); - searchPaths.add(new SearchPath(searchPaths.size(), pathComponents)); - callbacks.add(callback); + searchPaths.add(new SearchPath(pathComponents, callback)); return this; } diff --git a/src/main/java/software/amazon/com/ionpathextraction/PathExtractorImpl.java b/src/main/java/software/amazon/com/ionpathextraction/PathExtractorImpl.java index 3fc3de6..3505991 100644 --- a/src/main/java/software/amazon/com/ionpathextraction/PathExtractorImpl.java +++ b/src/main/java/software/amazon/com/ionpathextraction/PathExtractorImpl.java @@ -20,7 +20,6 @@ import java.util.ArrayList; import java.util.Deque; import java.util.List; -import java.util.function.Function; import software.amazon.com.ionpathextraction.pathcomponents.PathComponent; import software.amazon.ion.IonReader; import software.amazon.ion.IonType; @@ -39,17 +38,14 @@ class PathExtractorImpl implements PathExtractor { private final Tracker tracker; private final List searchPaths; - private final List> callbacks; /** * Constructor, should only be invoked by {@link PathExtractorBuilder}. */ PathExtractorImpl(final List searchPaths, - final List> callbacks, final PathExtractorConfig config) { this.searchPaths = searchPaths; - this.callbacks = callbacks; this.config = config; int maxSearchPathDepth = searchPaths.stream() @@ -85,30 +81,24 @@ private int matchRecursive(final IonReader reader) { // will continue to next depth final List partialMatches = new ArrayList<>(); - boolean hasTerminalMatch = false; for (SearchPath sp : tracker.activePaths()) { boolean match = pathComponentMatches(sp, reader, ordinal); boolean isTerminal = isTerminal(sp); if (match && isTerminal) { - hasTerminalMatch = true; int stepOutTimes = invokeCallback(reader, sp); if (stepOutTimes > 0) { return stepOutTimes - 1; } } - if (!isTerminal) { - // all non terminal paths are partial pathComponentMatches at depth zero - if (currentDepth == 0) { - partialMatches.add(sp); - } else if (match) { - partialMatches.add(sp); - } + // all non terminal paths are partial matches at depth zero + if (!isTerminal && (currentDepth == 0 || match)) { + partialMatches.add(sp); } } - if (needsToStepIn(reader, hasTerminalMatch)) { + if (IonType.isContainer(reader.getType()) && !partialMatches.isEmpty()) { tracker.push(partialMatches); reader.stepIn(); int stepOutTimes = matchRecursive(reader); @@ -128,7 +118,7 @@ private int matchRecursive(final IonReader reader) { private int invokeCallback(final IonReader reader, final SearchPath searchPath) { int previousReaderDepth = reader.getDepth(); - int stepOutTimes = callbacks.get(searchPath.getId()).apply(reader); + int stepOutTimes = searchPath.getCallback().apply(reader); int newReaderDepth = reader.getDepth(); checkState(previousReaderDepth == newReaderDepth, @@ -150,14 +140,6 @@ private int invokeCallback(final IonReader reader, final SearchPath searchPath) return stepOutTimes; } - private boolean needsToStepIn(final IonReader reader, final boolean hasTerminalMatches) { - if (tracker.getCurrentDepth() == 0 && hasTerminalMatches) { - return false; - } - - return IonType.isContainer(reader.getType()); - } - private boolean pathComponentMatches(final SearchPath searchPath, final IonReader reader, final int currentPosition) { diff --git a/src/main/java/software/amazon/com/ionpathextraction/SearchPath.java b/src/main/java/software/amazon/com/ionpathextraction/SearchPath.java index 3ff5194..29f29bd 100644 --- a/src/main/java/software/amazon/com/ionpathextraction/SearchPath.java +++ b/src/main/java/software/amazon/com/ionpathextraction/SearchPath.java @@ -14,26 +14,28 @@ package software.amazon.com.ionpathextraction; import java.util.List; +import java.util.function.Function; import software.amazon.com.ionpathextraction.pathcomponents.PathComponent; +import software.amazon.ion.IonReader; /** * A path which is provided to the extractor for matching. */ class SearchPath { - private final int id; private final List pathComponents; + private final Function callback; - SearchPath(final int id, final List pathComponents) { - this.id = id; + SearchPath(final List pathComponents, final Function callback) { this.pathComponents = pathComponents; - } - - int getId() { - return id; + this.callback = callback; } List getPathComponents() { return pathComponents; } + + public Function getCallback() { + return callback; + } } diff --git a/src/test/kotlin/software/amazon/com/ionpathextraction/PathExtractorTest.kt b/src/test/kotlin/software/amazon/com/ionpathextraction/PathExtractorTest.kt index 9ef10db..96d0e62 100644 --- a/src/test/kotlin/software/amazon/com/ionpathextraction/PathExtractorTest.kt +++ b/src/test/kotlin/software/amazon/com/ionpathextraction/PathExtractorTest.kt @@ -13,13 +13,13 @@ package software.amazon.com.ionpathextraction +import org.junit.Assert import org.junit.jupiter.api.Assertions.assertEquals import org.junit.jupiter.api.Test import org.junit.jupiter.api.assertAll import org.junit.jupiter.api.assertThrows import org.junit.jupiter.params.ParameterizedTest import org.junit.jupiter.params.provider.MethodSource -import software.amazon.com.ionpathextraction.PathExtractorTest.Companion.toText import software.amazon.com.ionpathextraction.exceptions.PathExtractionException import software.amazon.com.ionpathextraction.pathcomponents.PathComponent import software.amazon.ion.* @@ -55,7 +55,7 @@ class PathExtractorTest { .map { struct -> // single - val searchPaths = if(struct.containsKey("searchPath")) { + val searchPaths = if (struct.containsKey("searchPath")) { listOf(struct["searchPath"].toText()) } // multiple @@ -195,6 +195,35 @@ class PathExtractorTest { "relative reader depth: 1", exception.message) } + @Test + fun nestedSearchPaths() { + // Test only that the correct callbacks were called as reading the value for (foo) + // will advance the reader making (foo bar) not match + + val counter = mutableMapOf( + "()" to 0, + "(foo)" to 0, + "(foo bar)" to 0 + ) + + val extractor = PathExtractorBuilder.standard().apply { + counter.forEach { sp, _ -> + withSearchPath(sp) { _ -> + counter[sp] = counter[sp]!! + 1 + 0 + } + } + }.build() + + + extractor.match(ION.newReader("{foo: {bar: 1}}")) + + assertEquals(3, counter.size) + assertEquals(1, counter["()"]) + assertEquals(1, counter["(foo)"]) + assertEquals(1, counter["(foo bar)"]) + } + // Invalid configuration ----------------------------------------------------------------------------- @Test From 9697962e21c9b355e168cfdea94ffa2e3620adf5 Mon Sep 17 00:00:00 2001 From: barbosf Date: Thu, 18 Oct 2018 12:45:49 -0700 Subject: [PATCH 12/13] Changing README example to not depend on IonSystem Also added a test for the example to make sure it works --- README.md | 23 ++++---- .../com/ionpathextraction/ExampleTest.java | 55 +++++++++++++++++++ 2 files changed, 65 insertions(+), 13 deletions(-) create mode 100644 src/test/kotlin/software/amazon/com/ionpathextraction/ExampleTest.java diff --git a/README.md b/README.md index fa67fb5..8be8a6a 100644 --- a/README.md +++ b/README.md @@ -56,31 +56,28 @@ information on the callback contract. ```java // Capture all matched values into a List -final IonSystem ion = IonSystemBuilder.standard().build(); - -final List list = new ArrayList<>(); +final List list = new ArrayList<>(); final Function callback = (reader) -> { - IonValue ionValue = ion.newValue(reader); - list.add(ionValue); + list.add(reader.intValue()); return 0; }; final PathExtractor pathExtractor = PathExtractorBuilder.standard() - .register("(foo)", callback) - .register("(bar)", callback) - .register("(baz 1)", callback) + .withSearchPath("(foo)", callback) + .withSearchPath("(bar)", callback) + .withSearchPath("(baz 1)", callback) .build(); -IonReader ionReader = ion.newReader("{foo: 1}" - + "{bar: 2}" - + "{baz: [10,20,30,40]}" - + "{other: 99}" +final IonReader ionReader = IonReaderBuilder.standard().build("{foo: 1}" + + "{bar: 2}" + + "{baz: [10,20,30,40]}" + + "{other: 99}" ); pathExtractor.match(ionReader); -// list will contain 1, 2 and 20 +assertEquals("[1, 2, 20]", list.toString()); ``` ## Ion Developer information diff --git a/src/test/kotlin/software/amazon/com/ionpathextraction/ExampleTest.java b/src/test/kotlin/software/amazon/com/ionpathextraction/ExampleTest.java new file mode 100644 index 0000000..67420f5 --- /dev/null +++ b/src/test/kotlin/software/amazon/com/ionpathextraction/ExampleTest.java @@ -0,0 +1,55 @@ +/* + * Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. + * Licensed under the Apache License, Version 2.0 (the "License"). + * You may not use this file except in compliance with the License. + * A copy of the License is located at: + * + * http://aws.amazon.com/apache2.0/ + * + * or in the "license" file accompanying this file. This file is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific + * language governing permissions and limitations under the License. + */ + +package software.amazon.com.ionpathextraction; + +import static org.junit.Assert.assertEquals; + +import java.util.ArrayList; +import java.util.List; +import java.util.function.Function; +import org.junit.Test; +import software.amazon.ion.IonReader; +import software.amazon.ion.system.IonReaderBuilder; + +/** + * Test the example code in README.md + */ +public class ExampleTest { + + @Test + public void example() { + final List list = new ArrayList<>(); + final Function callback = (reader) -> { + list.add(reader.intValue()); + + return 0; + }; + + final PathExtractor pathExtractor = PathExtractorBuilder.standard() + .withSearchPath("(foo)", callback) + .withSearchPath("(bar)", callback) + .withSearchPath("(baz 1)", callback) + .build(); + + final IonReader ionReader = IonReaderBuilder.standard().build("{foo: 1}" + + "{bar: 2}" + + "{baz: [10,20,30,40]}" + + "{other: 99}" + ); + + pathExtractor.match(ionReader); + + assertEquals("[1, 2, 20]", list.toString()); + } +} From f96a8f34a1201864189606a4290b307143d35d5a Mon Sep 17 00:00:00 2001 From: barbosf Date: Thu, 18 Oct 2018 15:30:27 -0700 Subject: [PATCH 13/13] README.md typos --- README.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 8be8a6a..f6c65e9 100644 --- a/README.md +++ b/README.md @@ -41,7 +41,7 @@ example: ```java PathExtractorBuilder.standard() .withMatchCaseInsensitive(true) - .register("(foo)", (reader) -> { ... }) + .withSearchPath("(foo)", (reader) -> { ... }) .build() ``` @@ -49,7 +49,7 @@ see `PathExtractorBuilder` javadoc for more information on configuration options ### Notification Each time the `PathExtractor` encounters a value that matches a registered search path it will invoke the respective -callback passing the reader positioned at the current value. See `PathExtractorBuilder#register` methods for more +callback passing the reader positioned at the current value. See `PathExtractorBuilder#withSearchPath` methods for more information on the callback contract. ### Examples: