diff --git a/src/main/java/net/snowflake/ingest/streaming/internal/DataValidationUtil.java b/src/main/java/net/snowflake/ingest/streaming/internal/DataValidationUtil.java index 3ccecfdb5..c6df8b13a 100644 --- a/src/main/java/net/snowflake/ingest/streaming/internal/DataValidationUtil.java +++ b/src/main/java/net/snowflake/ingest/streaming/internal/DataValidationUtil.java @@ -78,7 +78,8 @@ class DataValidationUtil { private static final ObjectMapper objectMapper = new ObjectMapper(); - private static final JsonFactory factory = new JsonFactory(); + private static final JsonFactory factory = + new JsonFactory().configure(JsonGenerator.Feature.STRICT_DUPLICATE_DETECTION, true); // The version of Jackson we are using does not support serialization of date objects from the // java.time package. Here we define a module with custom java.time serializers. Additionally, we @@ -176,7 +177,16 @@ private static String validateAndParseSemiStructured( throw valueFormatNotAllowedException( columnName, snowflakeType, "Not a valid JSON", insertRowIndex); } catch (IOException e) { - throw new SFException(e, ErrorCode.IO_ERROR, "Cannot create JSON Parser or JSON generator"); + if (e.getMessage().contains("Duplicate field")) { + throw valueFormatNotAllowedException( + columnName, snowflakeType, "Not a valid JSON: duplicate field", insertRowIndex); + } + throw new SFException( + e, + ErrorCode.IO_ERROR, + String.format( + "Cannot create JSON Parser or JSON generator for column %s of type %s, rowIndex:%d", + columnName, snowflakeType, insertRowIndex)); } // We return the minified string from the result writer return resultWriter.toString(); diff --git a/src/test/java/net/snowflake/ingest/streaming/internal/DataValidationUtilTest.java b/src/test/java/net/snowflake/ingest/streaming/internal/DataValidationUtilTest.java index 4d0c51596..6e6f9afc9 100644 --- a/src/test/java/net/snowflake/ingest/streaming/internal/DataValidationUtilTest.java +++ b/src/test/java/net/snowflake/ingest/streaming/internal/DataValidationUtilTest.java @@ -856,6 +856,35 @@ public void testValidateAndParseObject() throws Exception { () -> validateAndParseObjectNew("COL", Collections.singletonMap("foo", new Object()), 0)); } + @Test + public void testValidateDuplicateKeys() { + // simple JSON object with duplicate keys can not be ingested + expectError( + ErrorCode.INVALID_VALUE_ROW, + () -> validateAndParseObjectNew("COL", "{\"key\":1, \"key\":2}", 0)); + expectError( + ErrorCode.INVALID_VALUE_ROW, + () -> validateAndParseVariantNew("COL", "{\"key\":1, \"key\":2}", 0)); + + // nested JSON object with duplicate keys can not be ingested + expectError( + ErrorCode.INVALID_VALUE_ROW, + () -> + validateAndParseObjectNew("COL", "{\"key\":1, \"nested\":{\"key\":2, \"key\":3}}", 0)); + expectError( + ErrorCode.INVALID_VALUE_ROW, + () -> + validateAndParseVariantNew("COL", "{\"key\":1, \"nested\":{\"key\":2, \"key\":3}}", 0)); + + // array of objects with duplicate keys can not be ingested + expectError( + ErrorCode.INVALID_VALUE_ROW, + () -> validateAndParseArrayNew("COL", "[{\"key\":1, \"key\":2}]", 0)); + expectError( + ErrorCode.INVALID_VALUE_ROW, + () -> validateAndParseVariantNew("COL", "[{\"key\":1, \"key\":2}]", 0)); + } + @Test public void testTooLargeVariant() { char[] stringContent = new char[16 * 1024 * 1024 - 16]; // {"a":"11","b":""}