From e3e80be386de789201a83197b39d9e6a0a4046c4 Mon Sep 17 00:00:00 2001 From: zhli1142015 Date: Wed, 18 Dec 2024 17:32:25 +0800 Subject: [PATCH] address comments --- velox/docs/functions/spark/json.rst | 18 +++++++-------- .../sparksql/specialforms/FromJson.cpp | 10 ++++----- .../functions/sparksql/tests/FromJsonTest.cpp | 22 ++++++++++--------- 3 files changed, 25 insertions(+), 25 deletions(-) diff --git a/velox/docs/functions/spark/json.rst b/velox/docs/functions/spark/json.rst index 8a5c5b4f6b8d..9bd95a648aa7 100644 --- a/velox/docs/functions/spark/json.rst +++ b/velox/docs/functions/spark/json.rst @@ -14,16 +14,14 @@ JSON Functions .. spark:function:: from_json(jsonString) -> [json object] - Casting a JSON text to the function's output type returns the value - represented by the JSON text if it matches the output type; otherwise, NULL - is returned. - The function supports ARRAY, MAP, and ROW as output types. For primitive - values, supported types include BOOLEAN, TINYINT, SMALLINT, INTEGER, BIGINT, - REAL, DOUBLE or VARCHAR. Casting to ARRAY and MAP is supported when the - element type of the array or the value type of the map is one of these - supported types. For maps, the key type must be VARCHAR. When casting to - ROW, only JSON objects are supported, and the keys in the JSON object must - match the field names of the ROW exactly (case-sensitive). + Casts a JSON string to an ARRAY, MAP, or ROW type, with the output type + determined by the expression. Returns NULL, if the input string is unparsable. + Supported element types include BOOLEAN, TINYINT, SMALLINT, INTEGER, BIGINT, + REAL, DOUBLE, VARCHAR, ARRAY, MAP, and ROW. When casting to ARRAY or MAP, + the element type of the array or the value type of the map must be one of + these supported types, and for maps, the key type must be VARCHAR. Casting + to ROW supports only JSON objects, where the keys must exactly match the ROW + field names (case sensitivity). Behaviors of the casts are shown with the examples below. :: SELECT from_json('{"a": true}'); -- {'a'=true} // Output type: ROW({"a"}, {BOOLEAN()}) diff --git a/velox/functions/sparksql/specialforms/FromJson.cpp b/velox/functions/sparksql/specialforms/FromJson.cpp index d6549ff643aa..5b94126a9f12 100644 --- a/velox/functions/sparksql/specialforms/FromJson.cpp +++ b/velox/functions/sparksql/specialforms/FromJson.cpp @@ -30,7 +30,7 @@ using namespace facebook::velox::exec; namespace facebook::velox::functions::sparksql { namespace { -/// Struct for extracting JSON data and writing it with type-specific handling. +// Struct for extracting JSON data and writing it with type-specific handling. template struct ExtractJsonTypeImpl { template @@ -78,9 +78,9 @@ struct ExtractJsonTypeImpl { static simdjson::error_code apply(Input value, exec::GenericWriter& writer, bool /*isRoot*/) { SIMDJSON_ASSIGN_OR_RAISE(auto type, value.type()); - auto& w = writer.castTo(); switch (type) { case simdjson::ondemand::json_type::boolean: { + auto& w = writer.castTo(); SIMDJSON_ASSIGN_OR_RAISE(w, value.get_bool()); break; } @@ -296,7 +296,7 @@ struct ExtractJsonTypeImpl { return simdjson::SUCCESS; } - // Casts a JSON value to a float point, handling both numeric Special cases + // Casts a JSON value to a float point, handling both numeric special cases // for NaN and Infinity. template static simdjson::error_code castJsonToFloatingPoint( @@ -352,8 +352,8 @@ struct ExtractJsonTypeImpl { /// - Boolean: Only `true` and `false` are valid; others return `NULL`. /// - Integral Types: Accepts only integers; floats or strings return `NULL`. /// - Float/Double: All numbers are valid; strings like `"NaN"`, `"+INF"`, -/// `"+Infinity"`, `"Infinity"`, `"-INF"`, -/// `"-Infinity"` are accepted, others return `NULL`. +/// `"+Infinity"`, `"Infinity"`, `"-INF"`, `"-Infinity"` are accepted, others +/// return `NULL`. /// - Array: Accepts JSON objects only if the array is the root type with ROW /// child type. /// - Map: Keys must be `VARCHAR` type. diff --git a/velox/functions/sparksql/tests/FromJsonTest.cpp b/velox/functions/sparksql/tests/FromJsonTest.cpp index 5e72ce039946..e7ee4b0b0d40 100644 --- a/velox/functions/sparksql/tests/FromJsonTest.cpp +++ b/velox/functions/sparksql/tests/FromJsonTest.cpp @@ -53,16 +53,6 @@ TEST_F(FromJsonTest, basicArray) { auto expected = makeArrayVector({{1}, {2}, {}}); auto input = makeFlatVector({R"([1])", R"([2])", R"([])"}); testFromJson(input, expected); - - auto rowVector = makeRowVector({"a"}, {makeFlatVector({1, 2, 2})}); - std::vector offsets; - offsets.push_back(0); - offsets.push_back(1); - offsets.push_back(2); - auto arrayVector = makeArrayVector(offsets, rowVector); - input = makeFlatVector( - {R"({"a": 1})", R"([{"a": 2}])", R"([{"a": 2}])"}); - testFromJson(input, arrayVector); } TEST_F(FromJsonTest, basicMap) { @@ -184,6 +174,18 @@ TEST_F(FromJsonTest, basicString) { testFromJson(input, makeRowVector({"a"}, {expected})); } +TEST_F(FromJsonTest, nestedComplexType) { + auto rowVector = makeRowVector({"a"}, {makeFlatVector({1, 2, 2})}); + std::vector offsets; + offsets.push_back(0); + offsets.push_back(1); + offsets.push_back(2); + auto arrayVector = makeArrayVector(offsets, rowVector); + auto input = makeFlatVector( + {R"({"a": 1})", R"([{"a": 2}])", R"([{"a": 2}])"}); + testFromJson(input, arrayVector); +} + TEST_F(FromJsonTest, keyCaseSensitive) { auto expected1 = makeNullableFlatVector({1, 2, 4}); auto expected2 = makeNullableFlatVector({3, 4, 5});