Skip to content

Commit

Permalink
Add skewness Spark agg function
Browse files Browse the repository at this point in the history
  • Loading branch information
liujiayi771 committed Dec 19, 2023
1 parent fa6de06 commit 4b5efd3
Show file tree
Hide file tree
Showing 12 changed files with 733 additions and 459 deletions.
6 changes: 6 additions & 0 deletions velox/docs/functions/spark/aggregate.rst
Original file line number Diff line number Diff line change
Expand Up @@ -103,6 +103,12 @@ General Aggregate Functions

Returns b

.. spark:function:: skewness(x) -> double
Returns the skewness of all input values. When the count of `x` is greater than or equal to 1,
a non-null output will be generated. When the value of `m2` in the accumulator is 0, a null
output will be generated.

.. spark:function:: sum(x) -> bigint|double|real
Returns the sum of `x`.
Expand Down
4 changes: 4 additions & 0 deletions velox/exec/tests/SparkAggregationFuzzerTest.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,10 @@ int main(int argc, char** argv) {
// https://github.com/facebookincubator/velox/issues/7677
"max_by",
"min_by",
// The skewness functions of Velox and DuckDB use different
// algorithms.
// https://github.com/facebookincubator/velox/issues/4845
"skewness",
});

using Runner = facebook::velox::exec::test::AggregationFuzzerRunner;
Expand Down
6 changes: 4 additions & 2 deletions velox/functions/lib/aggregates/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -12,8 +12,10 @@
# See the License for the specific language governing permissions and
# limitations under the License.

add_library(velox_functions_aggregates SingleValueAccumulator.cpp
AverageAggregateBase.cpp ValueSet.cpp)
add_library(
velox_functions_aggregates
AverageAggregateBase.cpp CentralMomentsAggregatesBase.cpp
SingleValueAccumulator.cpp ValueSet.cpp)

target_link_libraries(velox_functions_aggregates velox_exec
velox_presto_serializer Folly::folly)
Expand Down
52 changes: 52 additions & 0 deletions velox/functions/lib/aggregates/CentralMomentsAggregatesBase.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
/*
* Copyright (c) Facebook, Inc. and its affiliates.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#include "velox/functions/lib/aggregates/CentralMomentsAggregatesBase.h"

namespace facebook::velox::functions::aggregate {

void checkAccumulatorRowType(
const TypePtr& type,
const std::string& errorMessage) {
VELOX_CHECK_EQ(type->kind(), TypeKind::ROW, "{}", errorMessage);
VELOX_CHECK_EQ(
type->childAt(kCentralMomentsIndices.count)->kind(),
TypeKind::BIGINT,
"{}",
errorMessage);
VELOX_CHECK_EQ(
type->childAt(kCentralMomentsIndices.m1)->kind(),
TypeKind::DOUBLE,
"{}",
errorMessage);
VELOX_CHECK_EQ(
type->childAt(kCentralMomentsIndices.m2)->kind(),
TypeKind::DOUBLE,
"{}",
errorMessage);
VELOX_CHECK_EQ(
type->childAt(kCentralMomentsIndices.m3)->kind(),
TypeKind::DOUBLE,
"{}",
errorMessage);
VELOX_CHECK_EQ(
type->childAt(kCentralMomentsIndices.m4)->kind(),
TypeKind::DOUBLE,
"{}",
errorMessage);
}

} // namespace facebook::velox::functions::aggregate
Loading

0 comments on commit 4b5efd3

Please sign in to comment.