From f6b9f44cf31b66aa2d9854ed594ee5d2fc1a4964 Mon Sep 17 00:00:00 2001 From: Dai MIKURUBE Date: Thu, 21 Sep 2023 13:46:16 +0900 Subject: [PATCH 1/5] Parse JSON into org.embulk.spi.json.JsonValue, not org.msgpack.value.Value --- build.gradle | 10 +- gradle.lockfile | 2 +- .../embulk/parser/json/JsonParserPlugin.java | 139 +++++++++--------- 3 files changed, 78 insertions(+), 73 deletions(-) diff --git a/build.gradle b/build.gradle index de273dba..15afc4a2 100644 --- a/build.gradle +++ b/build.gradle @@ -43,14 +43,14 @@ dependencies { implementation "com.fasterxml.jackson.core:jackson-databind:2.6.7.5" implementation "org.embulk:embulk-util-config:0.3.4" implementation "org.embulk:embulk-util-file:0.1.5" - implementation "org.embulk:embulk-util-json:0.2.2" + implementation "org.embulk:embulk-util-json:0.3.0" implementation "org.embulk:embulk-util-timestamp:0.2.2" testImplementation "junit:junit:4.13.2" - testImplementation "org.embulk:embulk-spi:0.10.50" - testImplementation "org.embulk:embulk-core:0.10.50" - testImplementation "org.embulk:embulk-deps:0.10.50" - testImplementation "org.embulk:embulk-junit4:0.10.50" + testImplementation "org.embulk:embulk-spi:0.11" + testImplementation "org.embulk:embulk-core:0.11.0" + testImplementation "org.embulk:embulk-deps:0.11.0" + testImplementation "org.embulk:embulk-junit4:0.11.0" testImplementation "com.google.guava:guava:18.0" } diff --git a/gradle.lockfile b/gradle.lockfile index 6e47b858..8829b64a 100644 --- a/gradle.lockfile +++ b/gradle.lockfile @@ -9,7 +9,7 @@ javax.validation:validation-api:1.1.0.Final=compileClasspath,runtimeClasspath org.embulk:embulk-spi:0.11=compileClasspath org.embulk:embulk-util-config:0.3.4=compileClasspath,runtimeClasspath org.embulk:embulk-util-file:0.1.5=compileClasspath,runtimeClasspath -org.embulk:embulk-util-json:0.2.2=compileClasspath,runtimeClasspath +org.embulk:embulk-util-json:0.3.0=compileClasspath,runtimeClasspath org.embulk:embulk-util-rubytime:0.3.3=compileClasspath,runtimeClasspath org.embulk:embulk-util-timestamp:0.2.2=compileClasspath,runtimeClasspath org.msgpack:msgpack-core:0.8.24=compileClasspath diff --git a/src/main/java/org/embulk/parser/json/JsonParserPlugin.java b/src/main/java/org/embulk/parser/json/JsonParserPlugin.java index cc925872..e62ba7c8 100644 --- a/src/main/java/org/embulk/parser/json/JsonParserPlugin.java +++ b/src/main/java/org/embulk/parser/json/JsonParserPlugin.java @@ -16,6 +16,7 @@ package org.embulk.parser.json; +import com.fasterxml.jackson.core.JsonFactory; import java.io.BufferedReader; import java.io.ByteArrayInputStream; import java.io.IOException; @@ -27,6 +28,7 @@ import java.util.HashMap; import java.util.List; import java.util.Map; +import java.util.Objects; import java.util.Optional; import java.util.function.Function; import java.util.regex.Pattern; @@ -42,6 +44,8 @@ import org.embulk.spi.PageOutput; import org.embulk.spi.ParserPlugin; import org.embulk.spi.Schema; +import org.embulk.spi.json.JsonObject; +import org.embulk.spi.json.JsonValue; import org.embulk.spi.type.TimestampType; import org.embulk.spi.type.Types; import org.embulk.util.config.Config; @@ -52,20 +56,12 @@ import org.embulk.util.config.units.SchemaConfig; import org.embulk.util.file.FileInputInputStream; import org.embulk.util.json.JsonParseException; -import org.embulk.util.json.JsonParser; +import org.embulk.util.json.JsonValueParser; import org.embulk.util.timestamp.TimestampFormatter; -import org.msgpack.core.Preconditions; -import org.msgpack.value.MapValue; -import org.msgpack.value.Value; -import org.msgpack.value.ValueFactory; import org.slf4j.Logger; import org.slf4j.LoggerFactory; public class JsonParserPlugin implements ParserPlugin { - public JsonParserPlugin() { - this.jsonParser = new JsonParser(); - } - public enum InvalidEscapeStringPolicy { PASSTHROUGH("PASSTHROUGH"), SKIP("SKIP"), @@ -167,40 +163,46 @@ public void run(TaskSource taskSource, Schema schema, FileInput input, PageOutpu timestampFormatters.putAll(newTimestampColumnFormattersAsMap(task, task.getSchemaConfig().get())); jsonPointers.putAll(createJsonPointerMap(schema, schemaConfig)); } + final JsonValueParser.Builder parserBuilder = JsonValueParser.builder(JSON_FACTORY); try (final PageBuilder pageBuilder = Exec.getPageBuilder(Exec.getBufferAllocator(), schema, output); FileInputInputStream in = new FileInputInputStream(input)) { while (in.nextFile()) { final String fileName = input.hintOfCurrentInputFileNameForLogging().orElse("-"); - boolean evenOneJsonParsed = false; - try (JsonParser.Stream stream = newJsonStream(in, task)) { - Value originalValue; - while ((originalValue = stream.next()) != null) { + try (final JsonValueParser parser = newParser(in, task, parserBuilder)) { + JsonValue originalValue; + while ((originalValue = parser.readJsonValue()) != null) { try { - Value value = originalValue; + JsonValue value = originalValue; if (task.getRoot().isPresent()) { try { - value = jsonParser.parseWithOffsetInJsonPointer(originalValue.toJson(), task.getRoot().get()); + value = JsonValueParser.builder(JSON_FACTORY) + .root(task.getRoot().get()) + .build(originalValue.toJson()) + .readJsonValue(); + if (value == null) { + throw new JsonRecordValidateException("A Json record doesn't have given 'JSON pointer to root'."); + } } catch (JsonParseException e) { throw new JsonRecordValidateException("A Json record doesn't have given 'JSON pointer to root'."); } } - final Iterable recordValues; + final Iterable recordValues; if (task.getFlattenJsonArray()) { - if (!value.isArrayValue()) { + if (!value.isJsonArray()) { throw new JsonRecordValidateException( String.format( "A Json record must represent array value with '__experimental__flatten_json_array' option, but it's %s", - value.getValueType().name())); + value.getEntityType().name())); } - recordValues = value.asArrayValue(); + recordValues = value.asJsonArray(); } else { recordValues = Collections.singletonList(value); } - for (Value recordValue : recordValues) { + for (final JsonValue recordValue : recordValues) { addRecord(task, pageBuilder, schema, timestampFormatters, jsonPointers, recordValue); evenOneJsonParsed = true; } @@ -232,21 +234,21 @@ private void addRecord( Schema schema, Map timestampFormatters, Map jsonPointers, - Value value) { - if (!value.isMapValue()) { + JsonValue value) { + if (!value.isJsonObject()) { throw new JsonRecordValidateException( - String.format("A Json record must represent map value but it's %s", value.getValueType().name())); + String.format("A Json record must represent map value but it's %s", value.getEntityType().name())); } try { if (isUsingCustomSchema(task)) { - setValueWithCustomSchema(pageBuilder, schema, timestampFormatters, jsonPointers, value.asMapValue()); + setValueWithCustomSchema(pageBuilder, schema, timestampFormatters, jsonPointers, value.asJsonObject()); } else { - setValueWithSingleJsonColumn(pageBuilder, schema, value.asMapValue()); + setValueWithSingleJsonColumn(pageBuilder, schema, value.asJsonObject()); } } catch (final DateTimeParseException ex) { throw new JsonRecordValidateException( - String.format("A Json record must have valid timestamp value but it's %s", value.getValueType().name())); + String.format("A Json record must have valid timestamp value but it's %s", value.getEntityType().name())); } pageBuilder.addRecord(); } @@ -255,7 +257,7 @@ private static boolean isUsingCustomSchema(PluginTask task) { return task.getSchemaConfig().isPresent() && !task.getSchemaConfig().get().isEmpty(); } - private static void setValueWithSingleJsonColumn(PageBuilder pageBuilder, Schema schema, MapValue value) { + private static void setValueWithSingleJsonColumn(PageBuilder pageBuilder, Schema schema, JsonObject value) { final Column column = schema.getColumn(0); // record column pageBuilder.setJson(column, value); } @@ -265,8 +267,8 @@ private void setValueWithCustomSchema( Schema schema, Map timestampFormatters, Map jsonPointers, - MapValue value) { - final Map map = value.map(); + JsonObject value) { + final Map map = value; String valueAsJsonString = null; if (!jsonPointers.isEmpty()) { // Convert to string in order to re-parse with given JSON pointer @@ -274,14 +276,21 @@ private void setValueWithCustomSchema( } for (Column column : schema.getColumns()) { final String jsonPointer = jsonPointers.get(column); - final Value columnValue; + final JsonValue columnValue; if (jsonPointer != null) { - columnValue = parseColumnValueWithOffsetInJsonPointer(valueAsJsonString, jsonPointer); + try { + columnValue = JsonValueParser.builder(JSON_FACTORY) + .root(jsonPointer) + .build(valueAsJsonString) + .readJsonValue(); + } catch (final IOException ex) { + throw new JsonParseException("Failed to parse JSON: " + valueAsJsonString, ex); + } } else { - columnValue = map.get(ValueFactory.newString(column.getName())); + columnValue = map.get(column.getName()); } - if (columnValue == null || columnValue.isNilValue()) { + if (columnValue == null || columnValue.isJsonNull()) { pageBuilder.setNull(column); continue; } @@ -290,10 +299,10 @@ private void setValueWithCustomSchema( @Override public void booleanColumn(Column column) { final boolean booleanValue; - if (columnValue.isBooleanValue()) { - booleanValue = columnValue.asBooleanValue().getBoolean(); + if (columnValue.isJsonBoolean()) { + booleanValue = columnValue.asJsonBoolean().booleanValue(); } else { - booleanValue = Boolean.parseBoolean(columnValue.toString()); + booleanValue = Boolean.parseBoolean(asString(columnValue)); } pageBuilder.setBoolean(column, booleanValue); } @@ -301,10 +310,10 @@ public void booleanColumn(Column column) { @Override public void longColumn(Column column) { final long longValue; - if (columnValue.isIntegerValue()) { - longValue = columnValue.asIntegerValue().toLong(); + if (columnValue.isJsonLong()) { + longValue = columnValue.asJsonLong().longValue(); } else { - longValue = Long.parseLong(columnValue.toString()); + longValue = Long.parseLong(asString(columnValue)); } pageBuilder.setLong(column, longValue); } @@ -312,22 +321,22 @@ public void longColumn(Column column) { @Override public void doubleColumn(Column column) { final double doubleValue; - if (columnValue.isFloatValue()) { - doubleValue = columnValue.asFloatValue().toDouble(); + if (columnValue.isJsonDouble()) { + doubleValue = columnValue.asJsonDouble().doubleValue(); } else { - doubleValue = Double.parseDouble(columnValue.toString()); + doubleValue = Double.parseDouble(asString(columnValue)); } pageBuilder.setDouble(column, doubleValue); } @Override public void stringColumn(Column column) { - pageBuilder.setString(column, columnValue.toString()); + pageBuilder.setString(column, asString(columnValue)); } @Override public void timestampColumn(Column column) { - pageBuilder.setTimestamp(column, timestampFormatters.get(column).parse(columnValue.toString())); + pageBuilder.setTimestamp(column, timestampFormatters.get(column).parse(asString(columnValue))); } @Override @@ -338,45 +347,28 @@ public void jsonColumn(Column column) { } } - private Value parseColumnValueWithOffsetInJsonPointer(String valueAsJsonString, String jsonPointer) { - try { - return jsonParser.parseWithOffsetInJsonPointer(valueAsJsonString, jsonPointer); - } catch (JsonParseException e) { - /* - * When JsonParseException is thrown, it would be an error that the given JSON pointer doesn't match with the JSON object. - * We would return NULL when the pointer doesn't match, not throw Exception. - * - * NOTE: We may change the behavior (ref: https://github.com/embulk/embulk/pull/1103#discussion_r255807991) - */ - return ValueFactory.newNil(); - } - } - - private JsonParser.Stream newJsonStream(FileInputInputStream in, PluginTask task) + private JsonValueParser newParser(final FileInputInputStream in, final PluginTask task, final JsonValueParser.Builder builder) throws IOException { final InvalidEscapeStringPolicy policy = task.getInvalidEscapeStringPolicy(); final InputStream inputStream; switch (policy) { case SKIP: case UNESCAPE: - byte[] lines = new BufferedReader(new InputStreamReader(in)) + final byte[] lines = new BufferedReader(new InputStreamReader(in)) .lines() .map(invalidEscapeStringFunction(policy)) .collect(Collectors.joining()) .getBytes(StandardCharsets.UTF_8); - inputStream = new ByteArrayInputStream(lines); - break; + return builder.build(new ByteArrayInputStream(lines)); case PASSTHROUGH: default: - inputStream = in; + return builder.build(in); } - - return jsonParser.open(inputStream); } static Function invalidEscapeStringFunction(final InvalidEscapeStringPolicy policy) { return input -> { - Preconditions.checkNotNull(input); + Objects.requireNonNull(input); if (policy == InvalidEscapeStringPolicy.PASSTHROUGH) { return input; } @@ -469,6 +461,13 @@ private static Map newTimestampColumnFormattersAsMap return Collections.unmodifiableMap(formatters); } + private static String asString(final JsonValue value) { + if (value.isJsonString()) { + return value.asJsonString().getString(); + } + return value.toString(); + } + static class JsonRecordValidateException extends DataException { JsonRecordValidateException(String message) { super(message); @@ -481,5 +480,11 @@ static class JsonRecordValidateException extends DataException { private static final Pattern DIGITS_PATTERN = Pattern.compile("\\p{XDigit}+"); - private final JsonParser jsonParser; + private static final JsonFactory JSON_FACTORY; + + static { + JSON_FACTORY = new JsonFactory(); + JSON_FACTORY.enable(com.fasterxml.jackson.core.JsonParser.Feature.ALLOW_UNQUOTED_CONTROL_CHARS); + JSON_FACTORY.enable(com.fasterxml.jackson.core.JsonParser.Feature.ALLOW_NON_NUMERIC_NUMBERS); + } } From 913478f226a27d5aa87410eb5f1eb7121c160504 Mon Sep 17 00:00:00 2001 From: Dai MIKURUBE Date: Thu, 21 Sep 2023 14:08:55 +0900 Subject: [PATCH 2/5] Test with org.embulk.spi.json.JsonValue, not org.msgpack.value.Value --- .../java/org/embulk/parser/json/Pages.java | 2 +- .../parser/json/TestJsonParserPlugin.java | 83 +++++++++---------- 2 files changed, 42 insertions(+), 43 deletions(-) diff --git a/src/test/java/org/embulk/parser/json/Pages.java b/src/test/java/org/embulk/parser/json/Pages.java index 7fc08218..7c1b0cc4 100644 --- a/src/test/java/org/embulk/parser/json/Pages.java +++ b/src/test/java/org/embulk/parser/json/Pages.java @@ -117,7 +117,7 @@ public void jsonColumn(Column column) { if (record.isNull(column)) { visit(column, null); } else { - visit(column, record.getJson(column)); + visit(column, record.getJsonValue(column)); } } } diff --git a/src/test/java/org/embulk/parser/json/TestJsonParserPlugin.java b/src/test/java/org/embulk/parser/json/TestJsonParserPlugin.java index d7f224b4..3bf94f61 100644 --- a/src/test/java/org/embulk/parser/json/TestJsonParserPlugin.java +++ b/src/test/java/org/embulk/parser/json/TestJsonParserPlugin.java @@ -23,11 +23,6 @@ import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertTrue; import static org.junit.Assert.fail; -import static org.msgpack.value.ValueFactory.newArray; -import static org.msgpack.value.ValueFactory.newBoolean; -import static org.msgpack.value.ValueFactory.newInteger; -import static org.msgpack.value.ValueFactory.newMap; -import static org.msgpack.value.ValueFactory.newString; import com.google.common.collect.ImmutableList; import java.io.ByteArrayInputStream; @@ -36,21 +31,25 @@ import java.time.Instant; import java.util.ArrayList; import java.util.List; -import java.util.Map; import org.embulk.config.ConfigSource; import org.embulk.spi.DataException; import org.embulk.spi.FileInput; import org.embulk.spi.Schema; +import org.embulk.spi.json.JsonArray; +import org.embulk.spi.json.JsonBoolean; +import org.embulk.spi.json.JsonDouble; +import org.embulk.spi.json.JsonLong; +import org.embulk.spi.json.JsonObject; +import org.embulk.spi.json.JsonString; +import org.embulk.spi.json.JsonValue; import org.embulk.test.EmbulkTestRuntime; import org.embulk.test.TestPageBuilderReader.MockPageOutput; import org.embulk.util.config.ConfigMapperFactory; import org.embulk.util.file.InputStreamFileInput; -import org.embulk.util.json.JsonParser; import org.embulk.util.timestamp.TimestampFormatter; import org.junit.Before; import org.junit.Rule; import org.junit.Test; -import org.msgpack.value.Value; public class TestJsonParserPlugin { @Rule @@ -100,31 +99,30 @@ public void readNormalJson() throws Exception { assertEquals(3, records.size()); Object[] record; - Map map; { // "{\"_c0\":true,\"_c1\":10,\"_c2\":\"embulk\",\"_c3\":{\"k\":\"v\"}}" record = records.get(0); assertEquals(1, record.length); - map = ((Value) record[0]).asMapValue().map(); + final JsonObject object = ((JsonValue) record[0]).asJsonObject(); - assertEquals(newBoolean(true), map.get(newString("_c0"))); - assertEquals(newInteger(10L), map.get(newString("_c1"))); - assertEquals(newString("embulk"), map.get(newString("_c2"))); - assertEquals(newMap(newString("k"), newString("v")), map.get(newString("_c3"))); + assertEquals(JsonBoolean.TRUE, object.get("_c0")); + assertEquals(JsonLong.of(10L), object.get("_c1")); + assertEquals(JsonString.of("embulk"), object.get("_c2")); + assertEquals(JsonObject.of("k", JsonString.of("v")), object.get("_c3")); } { // "{}" record = records.get(1); assertEquals(1, record.length); - assertTrue(((Value) record[0]).asMapValue().map().isEmpty()); + assertTrue(((JsonValue) record[0]).asJsonObject().isEmpty()); } { record = records.get(2); assertEquals(1, record.length); - map = ((Value) record[0]).asMapValue().map(); + final JsonObject object = ((JsonValue) record[0]).asJsonObject(); - assertEquals(newBoolean(false), map.get(newString("_c0"))); - assertEquals(newInteger(-10L), map.get(newString("_c1"))); - assertEquals(newString("エンバルク"), map.get(newString("_c2"))); - assertEquals(newArray(newString("e0"), newString("e1")), map.get(newString("_c3"))); + assertEquals(JsonBoolean.FALSE, object.get("_c0")); + assertEquals(JsonLong.of(-10L), object.get("_c1")); + assertEquals(JsonString.of("エンバルク"), object.get("_c2")); + assertEquals(JsonArray.of(JsonString.of("e0"), JsonString.of("e1")), object.get("_c3")); } } @@ -210,8 +208,8 @@ public void useSkipInvalidEscapeString() throws Exception { List records = Pages.toObjects(newSchema(), output.pages); assertEquals(1, records.size()); Object[] record = records.get(0); - Map map = ((Value) record[0]).asMapValue().map(); - assertEquals(newString("b"), map.get(newString(""))); + final JsonObject object = ((JsonValue) record[0]).asJsonObject(); + assertEquals(JsonString.of("b"), object.get("")); } @Test @@ -224,8 +222,8 @@ public void useUnEscapeInvalidEscapeString() throws Exception { List records = Pages.toObjects(newSchema(), output.pages); assertEquals(1, records.size()); Object[] record = records.get(0); - Map map = ((Value) record[0]).asMapValue().map(); - assertEquals(newString("b"), map.get(newString("a"))); + final JsonObject object = ((JsonValue) record[0]).asJsonObject(); + assertEquals(JsonString.of("b"), object.get("a")); } @Test @@ -331,7 +329,7 @@ public void useRoot() throws Exception { transaction(config, fileInput( "{\"_c0\":{\"b\": 1}, \"_c1\": true}", "{}", // should be skipped because it doesn't have "_c0" and stop_on_invalid_record is false - "{\"_c0\": 1}", // should be skipped because the "_c0"'s value isn't map value and stop_on_invalid_record is false + "{\"_c0\": 1}", // should be skipped because the "_c0"'s value isn't JSON Object and stop_on_invalid_record is false "{\"_c0\":{\"b\": 2}, \"_c1\": false}" )); @@ -339,12 +337,12 @@ public void useRoot() throws Exception { assertEquals(2, records.size()); Object[] record = records.get(0); - Map map = ((Value) record[0]).asMapValue().map(); - assertEquals(newInteger(1), map.get(newString("b"))); + final JsonObject object1 = ((JsonValue) record[0]).asJsonObject(); + assertEquals(JsonLong.of(1), object1.get("b")); record = records.get(1); - map = ((Value) record[0]).asMapValue().map(); - assertEquals(newInteger(2), map.get(newString("b"))); + final JsonObject object2 = ((JsonValue) record[0]).asJsonObject(); + assertEquals(JsonLong.of(2), object2.get("b")); } @Test(expected = DataException.class) @@ -381,7 +379,7 @@ public void useSchemaConfig() throws Exception { assertEquals(1, records.size()); Object[] record = records.get(0); - assertArrayEquals(record, new Object[]{1L, 1.234D, "a", true, toInstant("2019-01-02 03:04:56"), toJson("{\"a\": 1}"), null}); + assertArrayEquals(record, new Object[]{1L, 1.234D, "a", true, toInstant("2019-01-02 03:04:56"), JsonObject.of("a", JsonLong.of(1)), null}); } @Test @@ -397,7 +395,14 @@ public void useDefaultSchemaIfSchemaConfigIsEmptyArray() throws Exception { Object[] record = records.get(0); assertArrayEquals( record, - new Object[]{toJson("{\"_c0\": 1, \"_c1\": 1.234, \"_c2\": \"a\", \"_c3\": true, \"_c4\": \"2019-01-02 03:04:56\", \"_c5\":{\"a\": 1}}")}); + new Object[]{ JsonObject.of( + "_c0", JsonLong.of(1), + "_c1", JsonDouble.of(1.234), + "_c2", JsonString.of("a"), + "_c3", JsonBoolean.TRUE, + "_c4", JsonString.of("2019-01-02 03:04:56"), + "_c5", JsonObject.of("a", JsonLong.of(1))) + }); } @Test @@ -421,7 +426,7 @@ public void useSchemaConfigWithPointer() throws Exception { assertEquals(1, records.size()); Object[] record = records.get(0); - assertArrayEquals(record, new Object[]{1L, 1.234D, "foo", true, toInstant("2019-01-02 03:04:56"), toJson("{\"a\": 1}"), null}); + assertArrayEquals(record, new Object[]{1L, 1.234D, "foo", true, toInstant("2019-01-02 03:04:56"), JsonObject.of("a", JsonLong.of(1)), null}); } @Test @@ -433,8 +438,8 @@ public void useFlattenJsonArray() throws Exception { List records = Pages.toObjects(newSchema(), output.pages); assertEquals(2, records.size()); - assertArrayEquals(records.get(0), new Object[]{toJson("{\"_c0\": 1}")}); - assertArrayEquals(records.get(1), new Object[]{toJson("{\"_c0\": 2}")}); + assertArrayEquals(records.get(0), new Object[]{ JsonObject.of("_c0", JsonLong.of(1)) }); + assertArrayEquals(records.get(1), new Object[]{ JsonObject.of("_c0", JsonLong.of(2)) }); } @Test(expected = DataException.class) @@ -459,8 +464,8 @@ public void useFlattenJsonArrayWithRootPointer() throws Exception { List records = Pages.toObjects(newSchema(), output.pages); assertEquals(2, records.size()); - assertArrayEquals(records.get(0), new Object[]{toJson("{\"_c0\": 1}")}); - assertArrayEquals(records.get(1), new Object[]{toJson("{\"_c0\": 2}")}); + assertArrayEquals(records.get(0), new Object[]{ JsonObject.of("_c0", JsonLong.of(1)) }); + assertArrayEquals(records.get(1), new Object[]{ JsonObject.of("_c0", JsonLong.of(2)) }); } private ConfigSource config() { @@ -494,13 +499,7 @@ private static Instant toInstant(String dateTimeString) { return TIMESTAMP_FORMATTER.parse(dateTimeString); } - private static Value toJson(String json) { - return JSON_PARSER.parse(json); - } - private static final ConfigMapperFactory CONFIG_MAPPER_FACTORY = ConfigMapperFactory.builder().addDefaultModules().build(); private static TimestampFormatter TIMESTAMP_FORMATTER = TimestampFormatter.builderWithJava("yyyy-MM-dd HH:mm:ss").build(); - - private static final JsonParser JSON_PARSER = new JsonParser(); } From ba6d47bdd7fba2ab3675de0527bffc66e8eb4282 Mon Sep 17 00:00:00 2001 From: Dai MIKURUBE Date: Thu, 21 Sep 2023 14:12:45 +0900 Subject: [PATCH 3/5] Remove unnecessary dependencies --- build.gradle | 2 -- .../java/org/embulk/parser/json/TestJsonParserPlugin.java | 5 +++-- 2 files changed, 3 insertions(+), 4 deletions(-) diff --git a/build.gradle b/build.gradle index 15afc4a2..03f8c6c8 100644 --- a/build.gradle +++ b/build.gradle @@ -36,7 +36,6 @@ java { dependencies { compileOnly "org.embulk:embulk-spi:0.11" compileOnly "org.slf4j:slf4j-api:2.0.7" - compileOnly "org.msgpack:msgpack-core:0.8.24" implementation "com.fasterxml.jackson.core:jackson-annotations:2.6.7" implementation "com.fasterxml.jackson.core:jackson-core:2.6.7" @@ -51,7 +50,6 @@ dependencies { testImplementation "org.embulk:embulk-core:0.11.0" testImplementation "org.embulk:embulk-deps:0.11.0" testImplementation "org.embulk:embulk-junit4:0.11.0" - testImplementation "com.google.guava:guava:18.0" } embulkPlugin { diff --git a/src/test/java/org/embulk/parser/json/TestJsonParserPlugin.java b/src/test/java/org/embulk/parser/json/TestJsonParserPlugin.java index 3bf94f61..caafeea4 100644 --- a/src/test/java/org/embulk/parser/json/TestJsonParserPlugin.java +++ b/src/test/java/org/embulk/parser/json/TestJsonParserPlugin.java @@ -24,12 +24,13 @@ import static org.junit.Assert.assertTrue; import static org.junit.Assert.fail; -import com.google.common.collect.ImmutableList; import java.io.ByteArrayInputStream; import java.io.IOException; import java.io.InputStream; import java.time.Instant; import java.util.ArrayList; +import java.util.Arrays; +import java.util.Collections; import java.util.List; import org.embulk.config.ConfigSource; import org.embulk.spi.DataException; @@ -488,7 +489,7 @@ private FileInput fileInput(String... lines) throws Exception { private InputStreamFileInput.IteratorProvider provider(InputStream... inputStreams) throws IOException { return new InputStreamFileInput.IteratorProvider( - ImmutableList.copyOf(inputStreams)); + Collections.unmodifiableList(Arrays.asList(inputStreams))); } private Schema newSchema() { From 3fa6872c8876d86bbcc9ecbf7c55b79b5fbf83d3 Mon Sep 17 00:00:00 2001 From: Dai MIKURUBE Date: Thu, 21 Sep 2023 14:34:33 +0900 Subject: [PATCH 4/5] Get a comment back to keep the intention of Exception handling --- .../org/embulk/parser/json/JsonParserPlugin.java | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/src/main/java/org/embulk/parser/json/JsonParserPlugin.java b/src/main/java/org/embulk/parser/json/JsonParserPlugin.java index e62ba7c8..47f5743c 100644 --- a/src/main/java/org/embulk/parser/json/JsonParserPlugin.java +++ b/src/main/java/org/embulk/parser/json/JsonParserPlugin.java @@ -185,6 +185,14 @@ public void run(TaskSource taskSource, Schema schema, FileInput input, PageOutpu throw new JsonRecordValidateException("A Json record doesn't have given 'JSON pointer to root'."); } } catch (JsonParseException e) { + /* + * When JsonParseException is thrown, it would be an error that + * the given JSON pointer doesn't match with the JSON object. + * We would return NULL when the pointer doesn't match, not throw Exception. + * + * NOTE: We may change the behavior. + * See: https://github.com/embulk/embulk/pull/1103#discussion_r255807991 + */ throw new JsonRecordValidateException("A Json record doesn't have given 'JSON pointer to root'."); } } @@ -284,6 +292,14 @@ private void setValueWithCustomSchema( .build(valueAsJsonString) .readJsonValue(); } catch (final IOException ex) { + /* + * When JsonParseException is thrown, it would be an error that + * the given JSON pointer doesn't match with the JSON object. + * We would return NULL when the pointer doesn't match, not throw Exception. + * + * NOTE: We may change the behavior. + * See: https://github.com/embulk/embulk/pull/1103#discussion_r255807991 + */ throw new JsonParseException("Failed to parse JSON: " + valueAsJsonString, ex); } } else { From b8039b48869f43c58c1a6a3c9fab947751c76252 Mon Sep 17 00:00:00 2001 From: Dai MIKURUBE Date: Mon, 25 Sep 2023 21:53:42 +0900 Subject: [PATCH 5/5] Fix an Exception message for flatten_json_array --- src/main/java/org/embulk/parser/json/JsonParserPlugin.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/main/java/org/embulk/parser/json/JsonParserPlugin.java b/src/main/java/org/embulk/parser/json/JsonParserPlugin.java index 47f5743c..dd8ed78e 100644 --- a/src/main/java/org/embulk/parser/json/JsonParserPlugin.java +++ b/src/main/java/org/embulk/parser/json/JsonParserPlugin.java @@ -202,7 +202,7 @@ public void run(TaskSource taskSource, Schema schema, FileInput input, PageOutpu if (!value.isJsonArray()) { throw new JsonRecordValidateException( String.format( - "A Json record must represent array value with '__experimental__flatten_json_array' option, but it's %s", + "A Json record must represent array value with 'flatten_json_array' option, but it's %s", value.getEntityType().name())); } recordValues = value.asJsonArray();