From 8eed776a2f33080ef53f15cbbc2e00b3d8261715 Mon Sep 17 00:00:00 2001 From: Gibbs Geng Date: Tue, 14 Jan 2025 23:40:56 +0000 Subject: [PATCH 1/2] error out for duplicate keys --- .../internal/DataValidationUtil.java | 7 +++++- .../internal/DataValidationUtilTest.java | 23 +++++++++++++++++++ 2 files changed, 29 insertions(+), 1 deletion(-) diff --git a/src/main/java/net/snowflake/ingest/streaming/internal/DataValidationUtil.java b/src/main/java/net/snowflake/ingest/streaming/internal/DataValidationUtil.java index 3ccecfdb5..889bf3f92 100644 --- a/src/main/java/net/snowflake/ingest/streaming/internal/DataValidationUtil.java +++ b/src/main/java/net/snowflake/ingest/streaming/internal/DataValidationUtil.java @@ -78,7 +78,8 @@ class DataValidationUtil { private static final ObjectMapper objectMapper = new ObjectMapper(); - private static final JsonFactory factory = new JsonFactory(); + private static final JsonFactory factory = + new JsonFactory().configure(JsonGenerator.Feature.STRICT_DUPLICATE_DETECTION, true); // The version of Jackson we are using does not support serialization of date objects from the // java.time package. Here we define a module with custom java.time serializers. Additionally, we @@ -176,6 +177,10 @@ private static String validateAndParseSemiStructured( throw valueFormatNotAllowedException( columnName, snowflakeType, "Not a valid JSON", insertRowIndex); } catch (IOException e) { + if (e.getMessage().contains("Duplicate field")) { + throw valueFormatNotAllowedException( + columnName, snowflakeType, "Not a valid JSON: duplicate field", insertRowIndex); + } throw new SFException(e, ErrorCode.IO_ERROR, "Cannot create JSON Parser or JSON generator"); } // We return the minified string from the result writer diff --git a/src/test/java/net/snowflake/ingest/streaming/internal/DataValidationUtilTest.java b/src/test/java/net/snowflake/ingest/streaming/internal/DataValidationUtilTest.java index 4d0c51596..2b3d7e718 100644 --- a/src/test/java/net/snowflake/ingest/streaming/internal/DataValidationUtilTest.java +++ b/src/test/java/net/snowflake/ingest/streaming/internal/DataValidationUtilTest.java @@ -856,6 +856,29 @@ public void testValidateAndParseObject() throws Exception { () -> validateAndParseObjectNew("COL", Collections.singletonMap("foo", new Object()), 0)); } + @Test + public void testValidateDuplicateKeys() { + // simple JSON object with duplicate keys can not be ingested + expectError( + ErrorCode.INVALID_VALUE_ROW, + () -> validateAndParseObjectNew("COL", "{\"key\":1, \"key\":2}", 0)); + + expectError( + ErrorCode.INVALID_VALUE_ROW, + () -> validateAndParseObjectNew("COL", "{\"key\":1, \"key\":2}".getBytes(), 0)); + + // nested JSON object with duplicate keys can not be ingested + expectError( + ErrorCode.INVALID_VALUE_ROW, + () -> + validateAndParseObjectNew("COL", "{\"key\":1, \"nested\":{\"key\":2, \"key\":3}}", 0)); + expectError( + ErrorCode.INVALID_VALUE_ROW, + () -> + validateAndParseObjectNew( + "COL", "{\"key\":1, \"nested\":{\"key\":2, \"key\":3}}".getBytes(), 0)); + } + @Test public void testTooLargeVariant() { char[] stringContent = new char[16 * 1024 * 1024 - 16]; // {"a":"11","b":""} From ab63dfae0267a5c7a6b9bb00cc8ff0bccad43691 Mon Sep 17 00:00:00 2001 From: Gibbs Geng Date: Wed, 15 Jan 2025 23:00:39 +0000 Subject: [PATCH 2/2] add more tests and error log --- .../streaming/internal/DataValidationUtil.java | 7 ++++++- .../streaming/internal/DataValidationUtilTest.java | 14 ++++++++++---- 2 files changed, 16 insertions(+), 5 deletions(-) diff --git a/src/main/java/net/snowflake/ingest/streaming/internal/DataValidationUtil.java b/src/main/java/net/snowflake/ingest/streaming/internal/DataValidationUtil.java index 889bf3f92..c6df8b13a 100644 --- a/src/main/java/net/snowflake/ingest/streaming/internal/DataValidationUtil.java +++ b/src/main/java/net/snowflake/ingest/streaming/internal/DataValidationUtil.java @@ -181,7 +181,12 @@ private static String validateAndParseSemiStructured( throw valueFormatNotAllowedException( columnName, snowflakeType, "Not a valid JSON: duplicate field", insertRowIndex); } - throw new SFException(e, ErrorCode.IO_ERROR, "Cannot create JSON Parser or JSON generator"); + throw new SFException( + e, + ErrorCode.IO_ERROR, + String.format( + "Cannot create JSON Parser or JSON generator for column %s of type %s, rowIndex:%d", + columnName, snowflakeType, insertRowIndex)); } // We return the minified string from the result writer return resultWriter.toString(); diff --git a/src/test/java/net/snowflake/ingest/streaming/internal/DataValidationUtilTest.java b/src/test/java/net/snowflake/ingest/streaming/internal/DataValidationUtilTest.java index 2b3d7e718..6e6f9afc9 100644 --- a/src/test/java/net/snowflake/ingest/streaming/internal/DataValidationUtilTest.java +++ b/src/test/java/net/snowflake/ingest/streaming/internal/DataValidationUtilTest.java @@ -862,10 +862,9 @@ public void testValidateDuplicateKeys() { expectError( ErrorCode.INVALID_VALUE_ROW, () -> validateAndParseObjectNew("COL", "{\"key\":1, \"key\":2}", 0)); - expectError( ErrorCode.INVALID_VALUE_ROW, - () -> validateAndParseObjectNew("COL", "{\"key\":1, \"key\":2}".getBytes(), 0)); + () -> validateAndParseVariantNew("COL", "{\"key\":1, \"key\":2}", 0)); // nested JSON object with duplicate keys can not be ingested expectError( @@ -875,8 +874,15 @@ public void testValidateDuplicateKeys() { expectError( ErrorCode.INVALID_VALUE_ROW, () -> - validateAndParseObjectNew( - "COL", "{\"key\":1, \"nested\":{\"key\":2, \"key\":3}}".getBytes(), 0)); + validateAndParseVariantNew("COL", "{\"key\":1, \"nested\":{\"key\":2, \"key\":3}}", 0)); + + // array of objects with duplicate keys can not be ingested + expectError( + ErrorCode.INVALID_VALUE_ROW, + () -> validateAndParseArrayNew("COL", "[{\"key\":1, \"key\":2}]", 0)); + expectError( + ErrorCode.INVALID_VALUE_ROW, + () -> validateAndParseVariantNew("COL", "[{\"key\":1, \"key\":2}]", 0)); } @Test