Skip to content

Commit

Permalink
Implement flattening JSON Arrays
Browse files Browse the repository at this point in the history
  • Loading branch information
dmikurube committed Aug 30, 2023
1 parent 8eba18b commit 8ed9e36
Show file tree
Hide file tree
Showing 4 changed files with 239 additions and 6 deletions.
51 changes: 51 additions & 0 deletions src/main/java/org/embulk/util/json/FlattenJsonArrayFilter.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
/*
* Copyright 2023 The Embulk project
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package org.embulk.util.json;

import com.fasterxml.jackson.core.filter.TokenFilter;

/**
* Simple {@link TokenFilter} implementation to flatten top-level JSON Array(s).
*/
class FlattenJsonArrayFilter extends TokenFilter {
FlattenJsonArrayFilter(final int depth) {
if (depth <= 0) {
throw new IllegalArgumentException("FlattenJsonArrayFilter must receive at least 1 as depth.");
}
this.depth = depth;
}

@Override
public TokenFilter includeElement(final int index) {
if (this.depth <= 1) {
return TokenFilter.INCLUDE_ALL;
}
return new FlattenJsonArrayFilter(this.depth - 1);
}

@Override
public TokenFilter includeProperty(final String name) {
return null;
}

@Override
public String toString() {
return "[FlattenJsonArrayFilter depth: " + this.depth + "]";
}

private final int depth;
}
37 changes: 32 additions & 5 deletions src/main/java/org/embulk/util/json/JsonValueParser.java
Original file line number Diff line number Diff line change
Expand Up @@ -34,13 +34,15 @@
public final class JsonValueParser implements Closeable {
private JsonValueParser(
final com.fasterxml.jackson.core.JsonParser jacksonParser,
final int depthToFlattenJsonArrays,
final boolean hasLiteralsWithNumbers,
final boolean hasFallbacksForUnparsableNumbers,
final double defaultDouble,
final long defaultLong) {
this.jacksonParser = Objects.requireNonNull(jacksonParser);
this.valueReader = new InternalJsonValueReader(
hasLiteralsWithNumbers, hasFallbacksForUnparsableNumbers, defaultDouble, defaultLong);
this.depthToFlattenJsonArrays = depthToFlattenJsonArrays;
this.hasLiteralsWithNumbers = hasLiteralsWithNumbers;
this.hasFallbacksForUnparsableNumbers = hasFallbacksForUnparsableNumbers;
this.defaultDouble = defaultDouble;
Expand All @@ -54,6 +56,7 @@ public static final class Builder {
Builder(final JsonFactory factory) {
this.factory = Objects.requireNonNull(factory);
this.root = null;
this.depthToFlattenJsonArrays = 0;
this.hasLiteralsWithNumbers = false;
this.hasFallbacksForUnparsableNumbers = false;
this.defaultDouble = 0.0;
Expand Down Expand Up @@ -84,6 +87,17 @@ public Builder root(final String root) {
return this;
}

/**
* Sets the depth to flatten JSON Arrays to parse.
*
* @param depthToFlattenJsonArrays the depth to flatten JSON Arrays
* @return this builder
*/
public Builder setDepthToFlattenJsonArrays(final int depthToFlattenJsonArrays) {
this.depthToFlattenJsonArrays = depthToFlattenJsonArrays;
return this;
}

/**
* Enables creating {@link JsonDouble} and {@link JsonLong} instances with supplemental literal strings.
*
Expand Down Expand Up @@ -126,6 +140,7 @@ public Builder fallbackForUnparsableNumbers(final double defaultDouble, final lo
public JsonValueParser build(final String json) throws IOException {
return new JsonValueParser(
buildJacksonParser(json),
this.depthToFlattenJsonArrays,
this.hasLiteralsWithNumbers,
this.hasFallbacksForUnparsableNumbers,
this.defaultDouble,
Expand All @@ -141,6 +156,7 @@ public JsonValueParser build(final String json) throws IOException {
public JsonValueParser build(final InputStream jsonStream) throws IOException {
return new JsonValueParser(
buildJacksonParser(jsonStream),
this.depthToFlattenJsonArrays,
this.hasLiteralsWithNumbers,
this.hasFallbacksForUnparsableNumbers,
this.defaultDouble,
Expand All @@ -156,20 +172,30 @@ private com.fasterxml.jackson.core.JsonParser buildJacksonParser(final InputStre
}

private com.fasterxml.jackson.core.JsonParser extendJacksonParser(final com.fasterxml.jackson.core.JsonParser baseParser) {
if (this.root == null) {
return baseParser;
}
return new FilteringParserDelegate(
baseParser,
com.fasterxml.jackson.core.JsonParser parser = baseParser;
if (this.root != null) {
parser = new FilteringParserDelegate(
parser,
new JsonPointerBasedFilter(this.root),
false, // TODO: Use com.fasterxml.jackson.core.filter.TokenFilter.Inclusion since Jackson 2.12.
true // Allow multiple matches
);
}
if (this.depthToFlattenJsonArrays > 0) {
parser = new FilteringParserDelegate(
parser,
new FlattenJsonArrayFilter(this.depthToFlattenJsonArrays),
false, // TODO: Use com.fasterxml.jackson.core.filter.TokenFilter.Inclusion since Jackson 2.12.
true // Allow multiple matches
);
}
return parser;
}

private final JsonFactory factory;

private JsonPointer root;
private int depthToFlattenJsonArrays;
private boolean hasLiteralsWithNumbers;
private boolean hasFallbacksForUnparsableNumbers;
private double defaultDouble;
Expand Down Expand Up @@ -243,6 +269,7 @@ public final void close() throws IOException {
private final com.fasterxml.jackson.core.JsonParser jacksonParser;
private final InternalJsonValueReader valueReader;

private final int depthToFlattenJsonArrays;
private final boolean hasLiteralsWithNumbers;
private final boolean hasFallbacksForUnparsableNumbers;
private final double defaultDouble;
Expand Down
94 changes: 94 additions & 0 deletions src/test/java/org/embulk/util/json/TestFlattenJsonArrayFilter.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,94 @@
/*
* Copyright 2023 The Embulk project
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package org.embulk.util.json;

import static org.junit.jupiter.api.Assertions.assertEquals;
import static org.junit.jupiter.api.Assertions.assertNull;
import static org.junit.jupiter.api.Assertions.assertThrows;

import com.fasterxml.jackson.core.JsonFactory;
import com.fasterxml.jackson.core.JsonToken;
import com.fasterxml.jackson.core.filter.FilteringParserDelegate;
import java.io.IOException;
import org.junit.jupiter.api.Test;

public class TestFlattenJsonArrayFilter {
@Test
public void testSimple() throws IOException {
final com.fasterxml.jackson.core.JsonParser parser = createFilteredParser("[{\"foo\":\"bar\"}]", 1);
assertEquals(JsonToken.START_OBJECT, parser.nextToken());
assertEquals(JsonToken.FIELD_NAME, parser.nextToken());
assertEquals("foo", parser.getValueAsString());
assertEquals(JsonToken.VALUE_STRING, parser.nextToken());
assertEquals("bar", parser.getValueAsString());
assertEquals(JsonToken.END_OBJECT, parser.nextToken());
assertNull(parser.nextToken());
}

@Test
public void testDouble() throws IOException {
final com.fasterxml.jackson.core.JsonParser parser = createFilteredParser("[[{\"foo\":\"bar\"}]]", 1);
assertEquals(JsonToken.START_ARRAY, parser.nextToken());
assertEquals(JsonToken.START_OBJECT, parser.nextToken());
assertEquals(JsonToken.FIELD_NAME, parser.nextToken());
assertEquals("foo", parser.getValueAsString());
assertEquals(JsonToken.VALUE_STRING, parser.nextToken());
assertEquals("bar", parser.getValueAsString());
assertEquals(JsonToken.END_OBJECT, parser.nextToken());
assertEquals(JsonToken.END_ARRAY, parser.nextToken());
assertNull(parser.nextToken());
}

@Test
public void testDouble2() throws IOException {
final com.fasterxml.jackson.core.JsonParser parser = createFilteredParser("[[{\"foo\":\"bar\"}]]", 2);
assertEquals(JsonToken.START_OBJECT, parser.nextToken());
assertEquals(JsonToken.FIELD_NAME, parser.nextToken());
assertEquals("foo", parser.getValueAsString());
assertEquals(JsonToken.VALUE_STRING, parser.nextToken());
assertEquals("bar", parser.getValueAsString());
assertEquals(JsonToken.END_OBJECT, parser.nextToken());
assertNull(parser.nextToken());
}

@Test
public void testNoArray() throws IOException {
final com.fasterxml.jackson.core.JsonParser parser = createFilteredParser("{\"foo\":\"bar\"}", 1);
assertNull(parser.nextToken());
}

@Test
public void test0() throws IOException {
assertThrows(IllegalArgumentException.class, () -> {
new FlattenJsonArrayFilter(0);
});
}

private static com.fasterxml.jackson.core.JsonParser createFilteredParser(
final String json,
final int depth) throws IOException {
final JsonFactory factory = new JsonFactory();
factory.enable(com.fasterxml.jackson.core.JsonParser.Feature.ALLOW_UNQUOTED_CONTROL_CHARS);
factory.enable(com.fasterxml.jackson.core.JsonParser.Feature.ALLOW_NON_NUMERIC_NUMBERS);
return new FilteringParserDelegate(
factory.createParser(json),
new FlattenJsonArrayFilter(depth),
false, // TODO: Use com.fasterxml.jackson.core.filter.TokenFilter.Inclusion since Jackson 2.12.
true // Allow multiple matches
);
}
}
63 changes: 62 additions & 1 deletion src/test/java/org/embulk/util/json/TestJsonValueParser.java
Original file line number Diff line number Diff line change
Expand Up @@ -134,6 +134,27 @@ public void testParseMultipleJsonsWithPointer() throws Exception {
assertNull(parser.readJsonValue());
}

@Test
public void testFlattenJsonArray() throws Exception {
final JsonValueParser parser = JsonValueParser.builder()
.setDepthToFlattenJsonArrays(1)
.build("[{\"a\": {\"b\": 1}},{\"a\": {\"b\": 2}}]");
assertEquals(JsonObject.of("a", JsonObject.of("b", JsonLong.of(1))), parser.readJsonValue());
assertEquals(JsonObject.of("a", JsonObject.of("b", JsonLong.of(2))), parser.readJsonValue());
assertNull(parser.readJsonValue());
}

@Test
public void testRootWithFlattenJsonArray() throws Exception {
final JsonValueParser parser = JsonValueParser.builder()
.root("/f")
.setDepthToFlattenJsonArrays(1)
.build("{\"f\":[{\"a\": {\"b\": 1}},{\"a\": {\"b\": 2}}]}");
assertEquals(JsonObject.of("a", JsonObject.of("b", JsonLong.of(1))), parser.readJsonValue());
assertEquals(JsonObject.of("a", JsonObject.of("b", JsonLong.of(2))), parser.readJsonValue());
assertNull(parser.readJsonValue());
}

@Test
public void testCaptureJsonPointers() throws Exception {
final JsonValueParser parser = JsonValueParser.builder().build(
Expand Down Expand Up @@ -202,9 +223,49 @@ public void testCaptureMixed() throws Exception {
}

@Test
public void testCaptureRoot() throws Exception {
public void testCaptureRootPointer() throws Exception {
final JsonValueParser parser = JsonValueParser.builder().build(
"{\"foo\":12,\"bar\":[true,false],\"baz\":null,\"qux\":{\"hoge\":\"fuga\"}}");
final CapturingPointers pointers = CapturingPointers.builder().build(); // No pointers -- root.
final JsonValue[] values = parser.captureJsonValues(pointers);
assertEquals(1, values.length);
assertEquals(
JsonObject.of(
"foo", JsonLong.of(12L),
"bar", JsonArray.of(JsonBoolean.TRUE, JsonBoolean.FALSE),
"baz", JsonNull.NULL,
"qux", JsonObject.of("hoge", JsonString.of("fuga"))),
values[0]);

// Confirming that JsonValueParser reaches at the end as expected.

assertNull(parser.captureJsonValues(pointers));
}

@Test
public void testCaptureWithRoot() throws Exception {
final JsonValueParser parser = JsonValueParser.builder().root("/ex").build(
"{\"ex\":{\"foo\":12,\"bar\":[true,false],\"baz\":null,\"qux\":{\"hoge\":\"fuga\"}}}");
final CapturingPointers pointers = CapturingPointers.builder().build();
final JsonValue[] values = parser.captureJsonValues(pointers);
assertEquals(1, values.length);
assertEquals(
JsonObject.of(
"foo", JsonLong.of(12L),
"bar", JsonArray.of(JsonBoolean.TRUE, JsonBoolean.FALSE),
"baz", JsonNull.NULL,
"qux", JsonObject.of("hoge", JsonString.of("fuga"))),
values[0]);

// Confirming that JsonValueParser reaches at the end as expected.

assertNull(parser.captureJsonValues(pointers));
}

@Test
public void testCaptureWithFlattenJsonArray() throws Exception {
final JsonValueParser parser = JsonValueParser.builder().setDepthToFlattenJsonArrays(1).build(
"[{\"foo\":12,\"bar\":[true,false],\"baz\":null,\"qux\":{\"hoge\":\"fuga\"}}]");
final CapturingPointers pointers = CapturingPointers.builder().build();
final JsonValue[] values = parser.captureJsonValues(pointers);
assertEquals(1, values.length);
Expand Down

0 comments on commit 8ed9e36

Please sign in to comment.