Skip to content

Commit

Permalink
feat: expose centroids in approx_percentile_cont fluent api (apache#1…
Browse files Browse the repository at this point in the history
…1878)

* feat: expose centroids in approx_percentile_count fluent api

Closes apache#11877

* avoid repeated import prefix in function signature

* update test_fn_approx_percentile_cont so that adjusting centroids changes the result
  • Loading branch information
Michael-J-Ward authored Aug 8, 2024
1 parent 20fbd88 commit 56f8e35
Show file tree
Hide file tree
Showing 3 changed files with 34 additions and 10 deletions.
19 changes: 17 additions & 2 deletions datafusion/core/tests/dataframe/dataframe_functions.rs
Original file line number Diff line number Diff line change
Expand Up @@ -360,7 +360,7 @@ async fn test_fn_approx_median() -> Result<()> {

#[tokio::test]
async fn test_fn_approx_percentile_cont() -> Result<()> {
let expr = approx_percentile_cont(col("b"), lit(0.5));
let expr = approx_percentile_cont(col("b"), lit(0.5), None);

let expected = [
"+---------------------------------------------+",
Expand All @@ -381,7 +381,7 @@ async fn test_fn_approx_percentile_cont() -> Result<()> {
None::<&str>,
"arg_2".to_string(),
));
let expr = approx_percentile_cont(col("b"), alias_expr);
let expr = approx_percentile_cont(col("b"), alias_expr, None);
let df = create_test_table().await?;
let expected = [
"+--------------------------------------+",
Expand All @@ -394,6 +394,21 @@ async fn test_fn_approx_percentile_cont() -> Result<()> {

assert_batches_eq!(expected, &batches);

// with number of centroids set
let expr = approx_percentile_cont(col("b"), lit(0.5), Some(lit(2)));
let expected = [
"+------------------------------------------------------+",
"| approx_percentile_cont(test.b,Float64(0.5),Int32(2)) |",
"+------------------------------------------------------+",
"| 30 |",
"+------------------------------------------------------+",
];

let df = create_test_table().await?;
let batches = df.aggregate(vec![], vec![expr]).unwrap().collect().await?;

assert_batches_eq!(expected, &batches);

Ok(())
}

Expand Down
22 changes: 15 additions & 7 deletions datafusion/functions-aggregate/src/approx_percentile_cont.rs
Original file line number Diff line number Diff line change
Expand Up @@ -46,13 +46,21 @@ use datafusion_physical_expr_common::aggregate::tdigest::{
};
use datafusion_physical_expr_common::utils::limited_convert_logical_expr_to_physical_expr_with_dfschema;

make_udaf_expr_and_func!(
ApproxPercentileCont,
approx_percentile_cont,
expression percentile,
"Computes the approximate percentile continuous of a set of numbers",
approx_percentile_cont_udaf
);
create_func!(ApproxPercentileCont, approx_percentile_cont_udaf);

/// Computes the approximate percentile continuous of a set of numbers
pub fn approx_percentile_cont(
expression: Expr,
percentile: Expr,
centroids: Option<Expr>,
) -> Expr {
let args = if let Some(centroids) = centroids {
vec![expression, percentile, centroids]
} else {
vec![expression, percentile]
};
approx_percentile_cont_udaf().call(args)
}

pub struct ApproxPercentileCont {
signature: Signature,
Expand Down
3 changes: 2 additions & 1 deletion datafusion/proto/tests/cases/roundtrip_logical_plan.rs
Original file line number Diff line number Diff line change
Expand Up @@ -887,7 +887,8 @@ async fn roundtrip_expr_api() -> Result<()> {
stddev_pop(lit(2.2)),
approx_distinct(lit(2)),
approx_median(lit(2)),
approx_percentile_cont(lit(2), lit(0.5)),
approx_percentile_cont(lit(2), lit(0.5), None),
approx_percentile_cont(lit(2), lit(0.5), Some(lit(50))),
approx_percentile_cont_with_weight(lit(2), lit(1), lit(0.5)),
grouping(lit(1)),
bit_and(lit(2)),
Expand Down

0 comments on commit 56f8e35

Please sign in to comment.