Skip to content

Commit

Permalink
Add array_size Spark function (facebookincubator#9516)
Browse files Browse the repository at this point in the history
Summary:
Returns size of the array.

Spark dunction doc - https://spark.apache.org/docs/latest/api/sql/index.html#array_size

Pull Request resolved: facebookincubator#9516

Reviewed By: pedroerp

Differential Revision: D56524603

Pulled By: Yuhta

fbshipit-source-id: ec856bda0c460ee4286016cffc52f31a505db775
  • Loading branch information
acvictor authored and facebook-github-bot committed Apr 25, 2024
1 parent 61d718d commit 6c0bcb4
Show file tree
Hide file tree
Showing 5 changed files with 126 additions and 0 deletions.
6 changes: 6 additions & 0 deletions velox/docs/functions/spark/array.rst
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,12 @@ Array Functions
SELECT array_repeat(100, 0); -- []
SELECT array_repeat(100, -1); -- []

.. spark:function:: array_size(array(E)) -> integer
Returns the size of the array. ::
SELECT array_size(array(1, 2, 3)); -- 3
.. spark:function:: array_sort(array(E)) -> array(E)
Returns an array which has the sorted order of the input array(E). The elements of array(E) must
Expand Down
34 changes: 34 additions & 0 deletions velox/functions/sparksql/ArraySizeFunction.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
/*
* Copyright (c) Facebook, Inc. and its affiliates.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#pragma once

#include <cmath>
#include <type_traits>
#include "velox/functions/Macros.h"

namespace facebook::velox::functions::sparksql {

template <typename T>
struct ArraySizeFunction {
VELOX_DEFINE_FUNCTION_TYPES(T);

FOLLY_ALWAYS_INLINE void call(
int32_t& out,
const arg_type<velox::Array<Any>>& inputArray) {
out = inputArray.size();
}
};
} // namespace facebook::velox::functions::sparksql
4 changes: 4 additions & 0 deletions velox/functions/sparksql/Register.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@
#include "velox/functions/prestosql/DateTimeFunctions.h"
#include "velox/functions/prestosql/StringFunctions.h"
#include "velox/functions/sparksql/ArrayMinMaxFunction.h"
#include "velox/functions/sparksql/ArraySizeFunction.h"
#include "velox/functions/sparksql/ArraySort.h"
#include "velox/functions/sparksql/Bitwise.h"
#include "velox/functions/sparksql/DateTimeFunctions.h"
Expand Down Expand Up @@ -152,6 +153,9 @@ inline void registerArrayMinMaxFunctions(const std::string& prefix) {
void registerFunctions(const std::string& prefix) {
registerAllSpecialFormGeneralFunctions();

registerFunction<sparksql::ArraySizeFunction, int32_t, Array<Any>>(
{prefix + "array_size"});

// Register size functions
registerSize(prefix + "size");

Expand Down
81 changes: 81 additions & 0 deletions velox/functions/sparksql/tests/ArraySizeTest.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,81 @@
/*
* Copyright (c) Facebook, Inc. and its affiliates.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#include <gtest/gtest.h>
#include <limits>
#include <optional>
#include "velox/functions/sparksql/tests/SparkFunctionBaseTest.h"
#include "velox/type/Timestamp.h"

using namespace facebook::velox;
using namespace facebook::velox::test;
using namespace facebook::velox::functions::test;

namespace facebook::velox::functions::sparksql::test {
namespace {

class ArraySizeTest : public SparkFunctionBaseTest {
protected:
template <typename T>
int32_t arraySize(const std::vector<std::optional<T>>& input) {
auto row = makeRowVector({makeNullableArrayVector(
std::vector<std::vector<std::optional<T>>>{input})});
return evaluateOnce<int32_t>("array_size(c0)", row).value();
}
};

TEST_F(ArraySizeTest, boolean) {
EXPECT_EQ(arraySize<bool>({true, false}), 2);
EXPECT_EQ(arraySize<bool>({true}), 1);
EXPECT_EQ(arraySize<bool>({}), 0);
EXPECT_EQ(arraySize<bool>({true, false, true, std::nullopt}), 4);
}

TEST_F(ArraySizeTest, integer) {
EXPECT_EQ(arraySize<int8_t>({}), 0);
EXPECT_EQ(arraySize<int8_t>({1}), 1);
EXPECT_EQ(arraySize<int8_t>({std::nullopt}), 1);
EXPECT_EQ(arraySize<int8_t>({std::nullopt, 1}), 2);
}

TEST_F(ArraySizeTest, float) {
EXPECT_EQ(arraySize<float>({}), 0);
EXPECT_EQ(arraySize<float>({1.1}), 1);
EXPECT_EQ(arraySize<float>({std::nullopt}), 1);
EXPECT_EQ(arraySize<float>({std::nullopt, 1.1}), 2);
}

TEST_F(ArraySizeTest, varchar) {
EXPECT_EQ(arraySize<std::string>({"red", "blue"}), 2);
EXPECT_EQ(
arraySize<std::string>({std::nullopt, "blue", "yellow", "orange"}), 4);
EXPECT_EQ(arraySize<std::string>({}), 0);
EXPECT_EQ(arraySize<std::string>({std::nullopt}), 1);
}

TEST_F(ArraySizeTest, date) {
auto dt = [](const std::string& dateStr) { return DATE()->toDays(dateStr); };
EXPECT_EQ(arraySize<int32_t>({dt("1970-01-01"), dt("2023-08-23")}), 2);
}

TEST_F(ArraySizeTest, timestamp) {
auto ts = [](int64_t micros) { return Timestamp::fromMicros(micros); };
EXPECT_EQ(arraySize<Timestamp>({}), 0);
EXPECT_EQ(arraySize<Timestamp>({std::nullopt}), 1);
EXPECT_EQ(arraySize<Timestamp>({ts(0), ts(1)}), 2);
}
} // namespace
} // namespace facebook::velox::functions::sparksql::test
1 change: 1 addition & 0 deletions velox/functions/sparksql/tests/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ add_executable(
ArithmeticTest.cpp
ArrayMaxTest.cpp
ArrayMinTest.cpp
ArraySizeTest.cpp
ArraySortTest.cpp
BitwiseTest.cpp
ComparisonsTest.cpp
Expand Down

0 comments on commit 6c0bcb4

Please sign in to comment.