Skip to content

Commit

Permalink
Add Spark json_object_keys function (facebookincubator#10449)
Browse files Browse the repository at this point in the history
Summary:
A function which returns all the keys of the outermost JSON object.

Spark document:
https://spark.apache.org/docs/latest/api/sql/#json_object_keys
Spark implementation:
https://github.com/apache/spark/blob/master/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/jsonExpressions.scala#L1010-L1080

Pull Request resolved: facebookincubator#10449

Reviewed By: DanielHunte

Differential Revision: D60145866

Pulled By: kevinwilfong

fbshipit-source-id: 34982d023ba220d1a7f420f72d25fa81cf6ea137
  • Loading branch information
leoluan2009 authored and facebook-github-bot committed Jul 24, 2024
1 parent e5671c0 commit 82bde6d
Show file tree
Hide file tree
Showing 7 changed files with 127 additions and 1 deletion.
13 changes: 13 additions & 0 deletions velox/docs/functions/spark/json.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
==============
JSON Functions
==============

.. spark:function:: json_object_keys(jsonString) -> array(string)
Returns all the keys of the outermost JSON object as an array if a valid JSON object is given. If it is any other valid JSON string, an invalid JSON string or an empty string, the function returns null. ::

SELECT json_object_keys('{}'); -- []
SELECT json_object_keys('{"name": "Alice", "age": 5, "id": "001"}'); -- ['name', 'age', 'id']
SELECT json_object_keys(''); -- NULL
SELECT json_object_keys(1); -- NULL
SELECT json_object_keys('"hello"'); -- NULL
1 change: 1 addition & 0 deletions velox/docs/spark_functions.rst
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ Spark Functions
functions/spark/window
functions/spark/conversion
functions/spark/url
functions/spark/json

Here is a list of all scalar and aggregate Spark functions available in Velox.
Function names link to function descriptions. Check out coverage maps
Expand Down
3 changes: 2 additions & 1 deletion velox/functions/sparksql/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,8 @@ velox_link_libraries(
velox_functions_spark_specialforms
velox_is_null_functions
velox_functions_util
Folly::folly)
Folly::folly
simdjson::simdjson)

if(NOT VELOX_MONO_LIBRARY)
set_property(TARGET velox_functions_spark PROPERTY JOB_POOL_COMPILE
Expand Down
59 changes: 59 additions & 0 deletions velox/functions/sparksql/JsonObjectKeys.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
/*
* Copyright (c) Facebook, Inc. and its affiliates.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#pragma once

#include "velox/functions/prestosql/json/SIMDJsonUtil.h"

namespace facebook::velox::functions::sparksql {

/// json_object_keys(jsonString) -> array(string)
///
/// Returns all the keys of the outermost JSON object as an array if a valid
/// JSON object is given.
template <typename T>
struct JsonObjectKeysFunction {
VELOX_DEFINE_FUNCTION_TYPES(T);

FOLLY_ALWAYS_INLINE bool call(
out_type<Array<Varchar>>& out,
const arg_type<Varchar>& json) {
simdjson::ondemand::document jsonDoc;

simdjson::padded_string paddedJson(json.data(), json.size());
// The result is NULL if the given string is not a valid JSON string.
if (simdjsonParse(paddedJson).get(jsonDoc)) {
return false;
}

// The result is NULL if the given string is not a JSON object string.
if (jsonDoc.type() != simdjson::ondemand::json_type::object) {
return false;
}

simdjson::ondemand::object jsonObject;
// The result is NULL if the given string is not a valid JSON object string.
if (jsonDoc.get_object().get(jsonObject)) {
return false;
}

for (auto field : jsonObject) {
out.add_item().copy_from(std::string_view(field.unescaped_key()));
}
return true;
}
};

} // namespace facebook::velox::functions::sparksql
4 changes: 4 additions & 0 deletions velox/functions/sparksql/Register.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@
#include "velox/functions/sparksql/DateTimeFunctions.h"
#include "velox/functions/sparksql/Hash.h"
#include "velox/functions/sparksql/In.h"
#include "velox/functions/sparksql/JsonObjectKeys.h"
#include "velox/functions/sparksql/LeastGreatest.h"
#include "velox/functions/sparksql/MightContain.h"
#include "velox/functions/sparksql/MonotonicallyIncreasingId.h"
Expand Down Expand Up @@ -175,6 +176,9 @@ void registerFunctions(const std::string& prefix) {

registerRegexpReplace(prefix);

registerFunction<JsonObjectKeysFunction, Array<Varchar>, Varchar>(
{prefix + "json_object_keys"});

// Register string functions.
registerFunction<sparksql::ChrFunction, Varchar, int64_t>({prefix + "chr"});
registerFunction<AsciiFunction, int32_t, Varchar>({prefix + "ascii"});
Expand Down
1 change: 1 addition & 0 deletions velox/functions/sparksql/tests/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@ add_executable(
ElementAtTest.cpp
HashTest.cpp
InTest.cpp
JsonObjectKeysTest.cpp
LeastGreatestTest.cpp
MakeDecimalTest.cpp
MakeTimestampTest.cpp
Expand Down
47 changes: 47 additions & 0 deletions velox/functions/sparksql/tests/JsonObjectKeysTest.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
/*
* Copyright (c) Facebook, Inc. and its affiliates.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "velox/functions/sparksql/tests/SparkFunctionBaseTest.h"

using namespace facebook::velox::test;

namespace facebook::velox::functions::sparksql::test {
namespace {

class JsonObjectKeysTest : public SparkFunctionBaseTest {
protected:
VectorPtr jsonObjectKeys(const std::string& json) {
auto varcharVector = makeFlatVector<std::string>({json});
return evaluate("json_object_keys(c0)", makeRowVector({varcharVector}));
}
};

TEST_F(JsonObjectKeysTest, basic) {
auto expected =
makeArrayVectorFromJson<std::string>({"[\"name\",\"age\",\"id\"]"});
assertEqualVectors(
jsonObjectKeys(R"({"name": "Alice", "age": 5, "id": "001"})"), expected);

expected = makeArrayVectorFromJson<std::string>({"[]"});
assertEqualVectors(jsonObjectKeys(R"({})"), expected);

expected = makeNullableArrayVector<std::string>({std::nullopt});
assertEqualVectors(jsonObjectKeys(R"(1)"), expected);
assertEqualVectors(jsonObjectKeys(R"("hello")"), expected);
assertEqualVectors(jsonObjectKeys(R"("")"), expected);
}

} // namespace
} // namespace facebook::velox::functions::sparksql::test

0 comments on commit 82bde6d

Please sign in to comment.