diff --git a/velox/docs/functions/spark/json.rst b/velox/docs/functions/spark/json.rst new file mode 100644 index 000000000000..800487398688 --- /dev/null +++ b/velox/docs/functions/spark/json.rst @@ -0,0 +1,13 @@ +============== +JSON Functions +============== + +.. spark:function:: json_object_keys(jsonString) -> array(string) + + Returns all the keys of the outermost JSON object as an array if a valid JSON object is given. If it is any other valid JSON string, an invalid JSON string or an empty string, the function returns null. :: + + SELECT json_object_keys('{}'); -- [] + SELECT json_object_keys('{"name": "Alice", "age": 5, "id": "001"}'); -- ['name', 'age', 'id'] + SELECT json_object_keys(''); -- NULL + SELECT json_object_keys(1); -- NULL + SELECT json_object_keys('"hello"'); -- NULL diff --git a/velox/docs/spark_functions.rst b/velox/docs/spark_functions.rst index 5d8e58ab203b..8e1bb0768bc9 100644 --- a/velox/docs/spark_functions.rst +++ b/velox/docs/spark_functions.rst @@ -20,6 +20,7 @@ Spark Functions functions/spark/window functions/spark/conversion functions/spark/url + functions/spark/json Here is a list of all scalar and aggregate Spark functions available in Velox. Function names link to function descriptions. Check out coverage maps diff --git a/velox/functions/sparksql/CMakeLists.txt b/velox/functions/sparksql/CMakeLists.txt index 600967a14bc9..40d2519c5427 100644 --- a/velox/functions/sparksql/CMakeLists.txt +++ b/velox/functions/sparksql/CMakeLists.txt @@ -50,7 +50,8 @@ velox_link_libraries( velox_functions_spark_specialforms velox_is_null_functions velox_functions_util - Folly::folly) + Folly::folly + simdjson::simdjson) if(NOT VELOX_MONO_LIBRARY) set_property(TARGET velox_functions_spark PROPERTY JOB_POOL_COMPILE diff --git a/velox/functions/sparksql/JsonObjectKeys.h b/velox/functions/sparksql/JsonObjectKeys.h new file mode 100644 index 000000000000..a320cbb08413 --- /dev/null +++ b/velox/functions/sparksql/JsonObjectKeys.h @@ -0,0 +1,59 @@ +/* + * Copyright (c) Facebook, Inc. and its affiliates. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#pragma once + +#include "velox/functions/prestosql/json/SIMDJsonUtil.h" + +namespace facebook::velox::functions::sparksql { + +/// json_object_keys(jsonString) -> array(string) +/// +/// Returns all the keys of the outermost JSON object as an array if a valid +/// JSON object is given. +template +struct JsonObjectKeysFunction { + VELOX_DEFINE_FUNCTION_TYPES(T); + + FOLLY_ALWAYS_INLINE bool call( + out_type>& out, + const arg_type& json) { + simdjson::ondemand::document jsonDoc; + + simdjson::padded_string paddedJson(json.data(), json.size()); + // The result is NULL if the given string is not a valid JSON string. + if (simdjsonParse(paddedJson).get(jsonDoc)) { + return false; + } + + // The result is NULL if the given string is not a JSON object string. + if (jsonDoc.type() != simdjson::ondemand::json_type::object) { + return false; + } + + simdjson::ondemand::object jsonObject; + // The result is NULL if the given string is not a valid JSON object string. + if (jsonDoc.get_object().get(jsonObject)) { + return false; + } + + for (auto field : jsonObject) { + out.add_item().copy_from(std::string_view(field.unescaped_key())); + } + return true; + } +}; + +} // namespace facebook::velox::functions::sparksql diff --git a/velox/functions/sparksql/Register.cpp b/velox/functions/sparksql/Register.cpp index dbf129271750..1ff0ab9115de 100644 --- a/velox/functions/sparksql/Register.cpp +++ b/velox/functions/sparksql/Register.cpp @@ -35,6 +35,7 @@ #include "velox/functions/sparksql/DateTimeFunctions.h" #include "velox/functions/sparksql/Hash.h" #include "velox/functions/sparksql/In.h" +#include "velox/functions/sparksql/JsonObjectKeys.h" #include "velox/functions/sparksql/LeastGreatest.h" #include "velox/functions/sparksql/MightContain.h" #include "velox/functions/sparksql/MonotonicallyIncreasingId.h" @@ -175,6 +176,9 @@ void registerFunctions(const std::string& prefix) { registerRegexpReplace(prefix); + registerFunction, Varchar>( + {prefix + "json_object_keys"}); + // Register string functions. registerFunction({prefix + "chr"}); registerFunction({prefix + "ascii"}); diff --git a/velox/functions/sparksql/tests/CMakeLists.txt b/velox/functions/sparksql/tests/CMakeLists.txt index 0adacad86f1f..016f39e9b9c7 100644 --- a/velox/functions/sparksql/tests/CMakeLists.txt +++ b/velox/functions/sparksql/tests/CMakeLists.txt @@ -32,6 +32,7 @@ add_executable( ElementAtTest.cpp HashTest.cpp InTest.cpp + JsonObjectKeysTest.cpp LeastGreatestTest.cpp MakeDecimalTest.cpp MakeTimestampTest.cpp diff --git a/velox/functions/sparksql/tests/JsonObjectKeysTest.cpp b/velox/functions/sparksql/tests/JsonObjectKeysTest.cpp new file mode 100644 index 000000000000..79a608503c08 --- /dev/null +++ b/velox/functions/sparksql/tests/JsonObjectKeysTest.cpp @@ -0,0 +1,47 @@ +/* + * Copyright (c) Facebook, Inc. and its affiliates. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include "velox/functions/sparksql/tests/SparkFunctionBaseTest.h" + +using namespace facebook::velox::test; + +namespace facebook::velox::functions::sparksql::test { +namespace { + +class JsonObjectKeysTest : public SparkFunctionBaseTest { + protected: + VectorPtr jsonObjectKeys(const std::string& json) { + auto varcharVector = makeFlatVector({json}); + return evaluate("json_object_keys(c0)", makeRowVector({varcharVector})); + } +}; + +TEST_F(JsonObjectKeysTest, basic) { + auto expected = + makeArrayVectorFromJson({"[\"name\",\"age\",\"id\"]"}); + assertEqualVectors( + jsonObjectKeys(R"({"name": "Alice", "age": 5, "id": "001"})"), expected); + + expected = makeArrayVectorFromJson({"[]"}); + assertEqualVectors(jsonObjectKeys(R"({})"), expected); + + expected = makeNullableArrayVector({std::nullopt}); + assertEqualVectors(jsonObjectKeys(R"(1)"), expected); + assertEqualVectors(jsonObjectKeys(R"("hello")"), expected); + assertEqualVectors(jsonObjectKeys(R"("")"), expected); +} + +} // namespace +} // namespace facebook::velox::functions::sparksql::test