diff --git a/velox/docs/functions/spark/json.rst b/velox/docs/functions/spark/json.rst index aba634cb5f94..5ae258e6668e 100644 --- a/velox/docs/functions/spark/json.rst +++ b/velox/docs/functions/spark/json.rst @@ -14,15 +14,17 @@ JSON Functions .. spark:function:: from_json(jsonString) -> [json object] - Casting a JSON text to a supported type returns the value represented by this - JSON text. The JSON text must represent a valid value of the type it is casted - to, or null will be returned. Casting to ARRAY and MAP is supported when the - element type of the array is one of the supported types, or when the key type of - the map is VARCHAR and value type of the map is one of the supported types. When - casting from JSON to ROW, only JSON object are supported. Cast from JSON object - to ROW uses case sensitive match for the JSON keys. + Casting a JSON text to a supported type returns the value represented by + the JSON text if it matches the target type; otherwise, NULL is returned. + The function supports ARRAY, MAP, and ROW as root types. For primitive + values, supported types include BOOLEAN, TINYINT, SMALLINT, INTEGER, BIGINT, + REAL, DOUBLE or VARCHAR. Casting to ARRAY and MAP is supported when the + element type of the array or the value type of the map is one of these + supported types. For maps, the key type must be VARCHAR. When casting to + ROW, only JSON objects are supported, and the keys in the JSON object must + match the field names of the ROW exactly (case-sensitive). Behaviors of the casts are shown with the examples below::: - SELECT from_json('{"a": 1}', 'ROW(a INT)'); -- {a=1} - SELECT from_json('["name", "age", "id"]', 'array'); -- ['name', 'age', 'id'] - SELECT from_json('{"a": 1, "b": 2}', 'map'); -- {a=1, b=2} + SELECT from_json('{"a": 1}'); -- {'a'=1} // Output type: ROW(a INTEGER) + SELECT from_json('["name", "age", "id"]'); -- ['name', 'age', 'id'] // Output type: ARRAY(VARCHAR) + SELECT from_json('{"a": 1, "b": 2}'); -- {'a'=1, 'b'=2} // Output type: MAP(VARCHAR,INTEGER) diff --git a/velox/functions/sparksql/specialforms/FromJson.cpp b/velox/functions/sparksql/specialforms/FromJson.cpp index 4c2ab9a037bc..ce92dfe94891 100644 --- a/velox/functions/sparksql/specialforms/FromJson.cpp +++ b/velox/functions/sparksql/specialforms/FromJson.cpp @@ -418,7 +418,7 @@ class FromJsonFunction final : public exec::VectorFunction { break; } default: - VELOX_UNSUPPORTED("INVALID_JSON_SCHEMA"); + VELOX_UNSUPPORTED("Unsupported type {}.", result->type()->toString()); } } @@ -518,12 +518,12 @@ class FromJsonFunction final : public exec::VectorFunction { mutable std::string paddedInput_; }; -bool isSupportedType(const TypePtr& other, bool isRootType = true) { - switch (other->kind()) { +bool isSupportedType(const TypePtr& type, bool isRootType) { + switch (type->kind()) { case TypeKind::ARRAY: - return isSupportedType(other->childAt(0), false); + return isSupportedType(type->childAt(0), false); case TypeKind::ROW: - for (const auto& child : other->as().children()) { + for (const auto& child : type->as().children()) { if (!isSupportedType(child, false)) { return false; } @@ -531,19 +531,26 @@ bool isSupportedType(const TypePtr& other, bool isRootType = true) { return true; case TypeKind::MAP: return ( - other->childAt(0)->kind() == TypeKind::VARCHAR && - isSupportedType(other->childAt(1), false)); + type->childAt(0)->kind() == TypeKind::VARCHAR && + isSupportedType(type->childAt(1), false)); + case TypeKind::BIGINT: { + if (type->isDecimal()) { + return false; + } + return !isRootType; + } + case TypeKind::INTEGER: { + if (type->isDate()) { + return false; + } + return !isRootType; + } case TypeKind::BOOLEAN: - case TypeKind::BIGINT: - case TypeKind::INTEGER: case TypeKind::SMALLINT: case TypeKind::TINYINT: case TypeKind::DOUBLE: case TypeKind::REAL: case TypeKind::VARCHAR: { - if (other->isDate() || other->isDecimal()) { - return false; - } return !isRootType; } default: @@ -569,7 +576,7 @@ exec::ExprPtr FromJsonCallToSpecialForm::constructSpecialForm( TypeKind::VARCHAR, "The first argument of from_json should be of varchar type."); - if (!isSupportedType(type)) { + if (!isSupportedType(type, true)) { VELOX_UNSUPPORTED("Unsupported type {}.", type->toString()); }