From 9424f618012b20da58eb5ee7e8dab743734f5c07 Mon Sep 17 00:00:00 2001 From: jcsherin Date: Sat, 28 Sep 2024 20:02:43 +0530 Subject: [PATCH 01/34] Adds macro for udwf singleton --- datafusion/functions-window/Cargo.toml | 1 + datafusion/functions-window/src/lib.rs | 2 + datafusion/functions-window/src/macros.rs | 39 +++++++++++++++++++ datafusion/functions-window/src/row_number.rs | 17 +------- 4 files changed, 44 insertions(+), 15 deletions(-) create mode 100644 datafusion/functions-window/src/macros.rs diff --git a/datafusion/functions-window/Cargo.toml b/datafusion/functions-window/Cargo.toml index 8dcec6bc964b..952e5720c77c 100644 --- a/datafusion/functions-window/Cargo.toml +++ b/datafusion/functions-window/Cargo.toml @@ -43,6 +43,7 @@ datafusion-expr = { workspace = true } datafusion-functions-window-common = { workspace = true } datafusion-physical-expr-common = { workspace = true } log = { workspace = true } +paste = "1.0.15" [dev-dependencies] arrow = { workspace = true } diff --git a/datafusion/functions-window/src/lib.rs b/datafusion/functions-window/src/lib.rs index 790a500f1f3f..62534b26062b 100644 --- a/datafusion/functions-window/src/lib.rs +++ b/datafusion/functions-window/src/lib.rs @@ -29,6 +29,8 @@ use log::debug; use datafusion_expr::registry::FunctionRegistry; use datafusion_expr::WindowUDF; +#[macro_use] +mod macros; pub mod row_number; /// Fluent-style API for creating `Expr`s diff --git a/datafusion/functions-window/src/macros.rs b/datafusion/functions-window/src/macros.rs new file mode 100644 index 000000000000..0b51d53b641b --- /dev/null +++ b/datafusion/functions-window/src/macros.rs @@ -0,0 +1,39 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +macro_rules! create_udwf { + ($STRUCT_NAME:ident, $FN_NAME:ident, $CTOR:path) => { + paste::paste! { + /// Singleton instance of [$STRUCT_NAME], ensures the user-defined + /// window function is only created once. + /// + /// For example, `STATIC_RowNumber` + #[allow(non_upper_case_globals)] + static []: std::sync::OnceLock> = + std::sync::OnceLock::new(); + + /// Returns a [`WindowUDF`](datafusion_expr::WindowUDF) for [$STRUCT_NAME] + pub fn [<$FN_NAME _udwf>]() -> std::sync::Arc { + [] + .get_or_init(|| { + std::sync::Arc::new(datafusion_expr::WindowUDF::from($CTOR())) + }) + .clone() + } + } + } +} diff --git a/datafusion/functions-window/src/row_number.rs b/datafusion/functions-window/src/row_number.rs index 7f348bf9d2a0..d9502a614d4d 100644 --- a/datafusion/functions-window/src/row_number.rs +++ b/datafusion/functions-window/src/row_number.rs @@ -32,27 +32,14 @@ use datafusion_expr::{Expr, PartitionEvaluator, Signature, Volatility, WindowUDF use datafusion_functions_window_common::field; use field::WindowUDFFieldArgs; +create_udwf!(RowNumber, row_number, RowNumber::default); + /// Create a [`WindowFunction`](Expr::WindowFunction) expression for /// `row_number` user-defined window function. pub fn row_number() -> Expr { Expr::WindowFunction(WindowFunction::new(row_number_udwf(), vec![])) } -/// Singleton instance of `row_number`, ensures the UDWF is only created once. -#[allow(non_upper_case_globals)] -static STATIC_RowNumber: std::sync::OnceLock> = - std::sync::OnceLock::new(); - -/// Returns a [`WindowUDF`](datafusion_expr::WindowUDF) for `row_number` -/// user-defined window function. -pub fn row_number_udwf() -> std::sync::Arc { - STATIC_RowNumber - .get_or_init(|| { - std::sync::Arc::new(datafusion_expr::WindowUDF::from(RowNumber::default())) - }) - .clone() -} - /// row_number expression #[derive(Debug)] pub struct RowNumber { From 31819af3ee6b3d51ed58c42cc4cbddf66c23ceed Mon Sep 17 00:00:00 2001 From: jcsherin Date: Sat, 28 Sep 2024 22:27:29 +0530 Subject: [PATCH 02/34] Adds a doc comment parameter to macro --- datafusion/functions-window/src/macros.rs | 12 ++++++------ datafusion/functions-window/src/row_number.rs | 7 ++++++- 2 files changed, 12 insertions(+), 7 deletions(-) diff --git a/datafusion/functions-window/src/macros.rs b/datafusion/functions-window/src/macros.rs index 0b51d53b641b..3ccdfe941c1c 100644 --- a/datafusion/functions-window/src/macros.rs +++ b/datafusion/functions-window/src/macros.rs @@ -16,17 +16,17 @@ // under the License. macro_rules! create_udwf { - ($STRUCT_NAME:ident, $FN_NAME:ident, $CTOR:path) => { + ($STRUCT_NAME:ident, $FN_NAME:ident, $DOC:expr, $CTOR:path) => { paste::paste! { - /// Singleton instance of [$STRUCT_NAME], ensures the user-defined - /// window function is only created once. - /// - /// For example, `STATIC_RowNumber` + #[doc = concat!(" Singleton instance of [`", stringify!($STRUCT_NAME), "`], ensures the user-defined")] + #[doc = concat!(" window function is only created once.")] #[allow(non_upper_case_globals)] static []: std::sync::OnceLock> = std::sync::OnceLock::new(); - /// Returns a [`WindowUDF`](datafusion_expr::WindowUDF) for [$STRUCT_NAME] + #[doc = concat!(" Returns a [`WindowUDF`](datafusion_expr::WindowUDF) for [`", stringify!($STRUCT_NAME), "`].")] + #[doc = ""] + #[doc = concat!(" ", $DOC)] pub fn [<$FN_NAME _udwf>]() -> std::sync::Arc { [] .get_or_init(|| { diff --git a/datafusion/functions-window/src/row_number.rs b/datafusion/functions-window/src/row_number.rs index d9502a614d4d..61f58a6a1ac7 100644 --- a/datafusion/functions-window/src/row_number.rs +++ b/datafusion/functions-window/src/row_number.rs @@ -32,7 +32,12 @@ use datafusion_expr::{Expr, PartitionEvaluator, Signature, Volatility, WindowUDF use datafusion_functions_window_common::field; use field::WindowUDFFieldArgs; -create_udwf!(RowNumber, row_number, RowNumber::default); +create_udwf!( + RowNumber, + row_number, + "Returns a unique row number for each row in window partition beginning at 1.", + RowNumber::default +); /// Create a [`WindowFunction`](Expr::WindowFunction) expression for /// `row_number` user-defined window function. From 569b5d612247f449ef7ea765b49f6b245d082f4b Mon Sep 17 00:00:00 2001 From: jcsherin Date: Sat, 28 Sep 2024 22:59:55 +0530 Subject: [PATCH 03/34] Add doc comment for `create_udwf` macro --- datafusion/functions-window/src/macros.rs | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/datafusion/functions-window/src/macros.rs b/datafusion/functions-window/src/macros.rs index 3ccdfe941c1c..60238ddb3961 100644 --- a/datafusion/functions-window/src/macros.rs +++ b/datafusion/functions-window/src/macros.rs @@ -15,6 +15,15 @@ // specific language governing permissions and limitations // under the License. +/// Creates a singleton instance of a user-defined window function. +/// +/// # Parameters +/// +/// * `$STRUCT_NAME`: The user-defined window function struct. +/// * `$FN_NAME`: The prefix for the generated function name. +/// * `$DOC`: The doc comment for the user-defined window function. +/// * `$CTOR`: The user-defined window function constructor. +/// macro_rules! create_udwf { ($STRUCT_NAME:ident, $FN_NAME:ident, $DOC:expr, $CTOR:path) => { paste::paste! { From 54564136a66b01926adeadee1e99fd8ac1d11ed3 Mon Sep 17 00:00:00 2001 From: jcsherin Date: Sat, 28 Sep 2024 23:10:05 +0530 Subject: [PATCH 04/34] Uses default constructor --- datafusion/functions-window/src/macros.rs | 6 +++++- datafusion/functions-window/src/row_number.rs | 3 +-- 2 files changed, 6 insertions(+), 3 deletions(-) diff --git a/datafusion/functions-window/src/macros.rs b/datafusion/functions-window/src/macros.rs index 60238ddb3961..d13c42e5fe62 100644 --- a/datafusion/functions-window/src/macros.rs +++ b/datafusion/functions-window/src/macros.rs @@ -25,6 +25,10 @@ /// * `$CTOR`: The user-defined window function constructor. /// macro_rules! create_udwf { + ($STRUCT_NAME:ident, $FN_NAME:ident, $DOC:expr) => { + create_udwf!($STRUCT_NAME, $FN_NAME, $DOC, $STRUCT_NAME::default); + }; + ($STRUCT_NAME:ident, $FN_NAME:ident, $DOC:expr, $CTOR:path) => { paste::paste! { #[doc = concat!(" Singleton instance of [`", stringify!($STRUCT_NAME), "`], ensures the user-defined")] @@ -44,5 +48,5 @@ macro_rules! create_udwf { .clone() } } - } + }; } diff --git a/datafusion/functions-window/src/row_number.rs b/datafusion/functions-window/src/row_number.rs index 61f58a6a1ac7..d521f2fa0887 100644 --- a/datafusion/functions-window/src/row_number.rs +++ b/datafusion/functions-window/src/row_number.rs @@ -35,8 +35,7 @@ use field::WindowUDFFieldArgs; create_udwf!( RowNumber, row_number, - "Returns a unique row number for each row in window partition beginning at 1.", - RowNumber::default + "Returns a unique row number for each row in window partition beginning at 1." ); /// Create a [`WindowFunction`](Expr::WindowFunction) expression for From 88657cb186d6b9fcba00573b00dba80fe973f227 Mon Sep 17 00:00:00 2001 From: jcsherin Date: Sat, 28 Sep 2024 23:12:55 +0530 Subject: [PATCH 05/34] Update `Cargo.lock` in `datafusion-cli` --- datafusion-cli/Cargo.lock | 159 ++++++++++++++------------------------ 1 file changed, 59 insertions(+), 100 deletions(-) diff --git a/datafusion-cli/Cargo.lock b/datafusion-cli/Cargo.lock index fbe7d5c04b9b..a8b715588c66 100644 --- a/datafusion-cli/Cargo.lock +++ b/datafusion-cli/Cargo.lock @@ -424,9 +424,9 @@ dependencies = [ [[package]] name = "async-trait" -version = "0.1.82" +version = "0.1.83" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a27b8a3a6e1a44fa4c8baf1f653e4172e81486d4941f2237e20dc2d0cf4ddff1" +checksum = "721cae7de5c34fbb2acd27e21e6d2cf7b886dce0c27388d46c4e6c47ea4318dd" dependencies = [ "proc-macro2", "quote", @@ -450,15 +450,15 @@ checksum = "1505bd5d3d116872e7271a6d4e16d81d0c8570876c8de68093a09ac269d8aac0" [[package]] name = "autocfg" -version = "1.3.0" +version = "1.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0c4b4d0bd25bd0b74681c0ad21497610ce1b7c91b1022cd21c80c6fbdd9476b0" +checksum = "ace50bade8e6234aa140d9a2f552bbee1db4d353f69b8217bc503490fc1a9f26" [[package]] name = "aws-config" -version = "1.5.6" +version = "1.5.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "848d7b9b605720989929279fa644ce8f244d0ce3146fcca5b70e4eb7b3c020fc" +checksum = "8191fb3091fa0561d1379ef80333c3c7191c6f0435d986e85821bcf7acbd1126" dependencies = [ "aws-credential-types", "aws-runtime", @@ -523,9 +523,9 @@ dependencies = [ [[package]] name = "aws-sdk-sso" -version = "1.43.0" +version = "1.44.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "70a9d27ed1c12b1140c47daf1bc541606c43fdafd918c4797d520db0043ceef2" +checksum = "0b90cfe6504115e13c41d3ea90286ede5aa14da294f3fe077027a6e83850843c" dependencies = [ "aws-credential-types", "aws-runtime", @@ -545,9 +545,9 @@ dependencies = [ [[package]] name = "aws-sdk-ssooidc" -version = "1.44.0" +version = "1.45.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "44514a6ca967686cde1e2a1b81df6ef1883d0e3e570da8d8bc5c491dcb6fc29b" +checksum = "167c0fad1f212952084137308359e8e4c4724d1c643038ce163f06de9662c1d0" dependencies = [ "aws-credential-types", "aws-runtime", @@ -567,9 +567,9 @@ dependencies = [ [[package]] name = "aws-sdk-sts" -version = "1.43.0" +version = "1.44.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cd7a4d279762a35b9df97209f6808b95d4fe78547fe2316b4d200a0283960c5a" +checksum = "2cb5f98188ec1435b68097daa2a37d74b9d17c9caa799466338a8d1544e71b9d" dependencies = [ "aws-credential-types", "aws-runtime", @@ -707,9 +707,9 @@ dependencies = [ [[package]] name = "aws-smithy-types" -version = "1.2.6" +version = "1.2.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "03701449087215b5369c7ea17fef0dd5d24cb93439ec5af0c7615f58c3f22605" +checksum = "147100a7bea70fa20ef224a6bad700358305f5dc0f84649c53769761395b355b" dependencies = [ "base64-simd", "bytes", @@ -917,9 +917,9 @@ dependencies = [ [[package]] name = "cc" -version = "1.1.21" +version = "1.1.22" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "07b1695e2c7e8fc85310cde85aeaab7e3097f593c91d209d3f9df76c928100f0" +checksum = "9540e661f81799159abee814118cc139a2004b3a3aa3ea37724a1b66530b90e0" dependencies = [ "jobserver", "libc", @@ -975,9 +975,9 @@ dependencies = [ [[package]] name = "clap" -version = "4.5.17" +version = "4.5.18" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3e5a21b8495e732f1b3c364c9949b201ca7bae518c502c80256c96ad79eaf6ac" +checksum = "b0956a43b323ac1afaffc053ed5c4b7c1f1800bacd1683c353aabbb752515dd3" dependencies = [ "clap_builder", "clap_derive", @@ -985,9 +985,9 @@ dependencies = [ [[package]] name = "clap_builder" -version = "4.5.17" +version = "4.5.18" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8cf2dd12af7a047ad9d6da2b6b249759a22a7abc0f474c1dae1777afa4b21a73" +checksum = "4d72166dd41634086d5803a47eb71ae740e61d84709c36f3c34110173db3961b" dependencies = [ "anstream", "anstyle", @@ -997,9 +997,9 @@ dependencies = [ [[package]] name = "clap_derive" -version = "4.5.13" +version = "4.5.18" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "501d359d5f3dcaf6ecdeee48833ae73ec6e42723a1e52419c79abf9507eec0a0" +checksum = "4ac6a0c7b1a9e9a5186361f67dfa1b88213572f427fb9ab038efb2bd8c582dab" dependencies = [ "heck 0.5.0", "proc-macro2", @@ -1447,6 +1447,7 @@ dependencies = [ "datafusion-functions-window-common", "datafusion-physical-expr-common", "log", + "paste", ] [[package]] @@ -1722,9 +1723,9 @@ dependencies = [ [[package]] name = "flate2" -version = "1.0.33" +version = "1.0.34" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "324a1be68054ef05ad64b861cc9eaf1d623d2d8cb25b4bf2cb9cdd902b4bf253" +checksum = "a1b589b4dc103969ad3cf85c950899926ec64300a1a46d76c03a6072957036f0" dependencies = [ "crc32fast", "miniz_oxide", @@ -2137,9 +2138,9 @@ dependencies = [ [[package]] name = "hyper-util" -version = "0.1.8" +version = "0.1.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "da62f120a8a37763efb0cf8fdf264b884c7b8b9ac8660b900c8661030c00e6ba" +checksum = "41296eb09f183ac68eec06e03cdbea2e759633d4067b2f6552fc2e009bcad08b" dependencies = [ "bytes", "futures-channel", @@ -2150,7 +2151,6 @@ dependencies = [ "pin-project-lite", "socket2", "tokio", - "tower", "tower-service", "tracing", ] @@ -2333,9 +2333,9 @@ dependencies = [ [[package]] name = "libc" -version = "0.2.158" +version = "0.2.159" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d8adc4bb1803a324070e64a98ae98f38934d91957a99cfb3a43dcbc01bc56439" +checksum = "561d97a539a36e26a9a5fad1ea11a3039a67714694aaa379433e580854bc3dc5" [[package]] name = "libflate" @@ -2799,26 +2799,6 @@ dependencies = [ "siphasher", ] -[[package]] -name = "pin-project" -version = "1.1.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b6bf43b791c5b9e34c3d182969b4abb522f9343702850a2e57f460d00d09b4b3" -dependencies = [ - "pin-project-internal", -] - -[[package]] -name = "pin-project-internal" -version = "1.1.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2f38a4412a78282e09a2cf38d195ea5420d15ba0602cb375210efbc877243965" -dependencies = [ - "proc-macro2", - "quote", - "syn", -] - [[package]] name = "pin-project-lite" version = "0.2.14" @@ -2833,9 +2813,9 @@ checksum = "8b870d8c151b6f2fb93e84a13146138f05d02ed11c7e7c54f8826aaaf7c9f184" [[package]] name = "pkg-config" -version = "0.3.30" +version = "0.3.31" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d231b230927b5e4ad203db57bbcbee2802f6bce620b1e4a9024a07d94e2907ec" +checksum = "953ec861398dccce10c670dfeaf3ec4911ca479e9c02154b3a215178c5f566f2" [[package]] name = "powerfmt" @@ -2908,9 +2888,9 @@ checksum = "b76f1009795ca44bb5aaae8fd3f18953e209259c33d9b059b1f53d58ab7511db" [[package]] name = "quick-xml" -version = "0.36.1" +version = "0.36.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "96a05e2e8efddfa51a84ca47cec303fac86c8541b686d37cac5efc0e094417bc" +checksum = "f7649a7b4df05aed9ea7ec6f628c67c9953a43869b8bc50929569b2999d443fe" dependencies = [ "memchr", "serde", @@ -3015,9 +2995,9 @@ dependencies = [ [[package]] name = "redox_syscall" -version = "0.5.4" +version = "0.5.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0884ad60e090bf1345b93da0a5de8923c93884cd03f40dfcfddd3b4bee661853" +checksum = "355ae415ccd3a04315d3f8246e86d67689ea74d88d915576e1589a351062a13b" dependencies = [ "bitflags 2.6.0", ] @@ -3289,9 +3269,9 @@ dependencies = [ [[package]] name = "rustls-pki-types" -version = "1.8.0" +version = "1.9.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fc0a2ce646f8655401bb81e7927b812614bd5d91dbc968696be50603510fcaf0" +checksum = "0e696e35370c65c9c541198af4543ccd580cf17fc25d8e05c5a242b202488c55" [[package]] name = "rustls-webpki" @@ -3397,9 +3377,9 @@ dependencies = [ [[package]] name = "security-framework-sys" -version = "2.11.1" +version = "2.12.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "75da29fe9b9b08fe9d6b22b5b4bcbc75d8db3aa31e639aa56bb62e9d46bfceaf" +checksum = "ea4a292869320c0272d7bc55a5a6aafaff59b4f63404a003887b679a2e05b4b6" dependencies = [ "core-foundation-sys", "libc", @@ -3510,18 +3490,18 @@ checksum = "3c5e1a9a646d36c3599cd173a41282daf47c44583ad367b8e6837255952e5c67" [[package]] name = "snafu" -version = "0.8.4" +version = "0.8.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2b835cb902660db3415a672d862905e791e54d306c6e8189168c7f3d9ae1c79d" +checksum = "223891c85e2a29c3fe8fb900c1fae5e69c2e42415e3177752e8718475efa5019" dependencies = [ "snafu-derive", ] [[package]] name = "snafu-derive" -version = "0.8.4" +version = "0.8.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "38d1e02fca405f6280643174a50c942219f0bbf4dbf7d480f1dd864d6f211ae5" +checksum = "03c3c6b7927ffe7ecaa769ee0e3994da3b8cafc8f444578982c83ecb161af917" dependencies = [ "heck 0.5.0", "proc-macro2", @@ -3633,9 +3613,9 @@ checksum = "13c2bddecc57b384dee18652358fb23172facb8a2c51ccc10d74c157bdea3292" [[package]] name = "syn" -version = "2.0.77" +version = "2.0.79" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9f35bcdf61fd8e7be6caf75f429fdca8beb3ed76584befb503b1569faee373ed" +checksum = "89132cd0bf050864e1d38dc3bbc07a0eb8e7530af26344d3d2bbbef83499f590" dependencies = [ "proc-macro2", "quote", @@ -3653,9 +3633,9 @@ dependencies = [ [[package]] name = "tempfile" -version = "3.12.0" +version = "3.13.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "04cbcdd0c794ebb0d4cf35e88edd2f7d2c4c3e9a5a6dab322839b321c6a87a64" +checksum = "f0f2c9fc62d0beef6951ccffd757e241266a2c833136efbe35af6cd2567dca5b" dependencies = [ "cfg-if", "fastrand", @@ -3672,18 +3652,18 @@ checksum = "3369f5ac52d5eb6ab48c6b4ffdc8efbcad6b89c765749064ba298f2c68a16a76" [[package]] name = "thiserror" -version = "1.0.63" +version = "1.0.64" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c0342370b38b6a11b6cc11d6a805569958d54cfa061a29969c3b5ce2ea405724" +checksum = "d50af8abc119fb8bb6dbabcfa89656f46f84aa0ac7688088608076ad2b459a84" dependencies = [ "thiserror-impl", ] [[package]] name = "thiserror-impl" -version = "1.0.63" +version = "1.0.64" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a4558b58466b9ad7ca0f102865eccc95938dca1a74a856f2b57b6629050da261" +checksum = "08904e7672f5eb876eaaf87e0ce17857500934f4981c4a0ab2b4aa98baac7fc3" dependencies = [ "proc-macro2", "quote", @@ -3826,36 +3806,15 @@ checksum = "0dd7358ecb8fc2f8d014bf86f6f638ce72ba252a2c3a2572f2a795f1d23efb41" [[package]] name = "toml_edit" -version = "0.22.21" +version = "0.22.22" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3b072cee73c449a636ffd6f32bd8de3a9f7119139aff882f44943ce2986dc5cf" +checksum = "4ae48d6208a266e853d946088ed816055e556cc6028c5e8e2b84d9fa5dd7c7f5" dependencies = [ "indexmap", "toml_datetime", "winnow", ] -[[package]] -name = "tower" -version = "0.4.13" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b8fa9be0de6cf49e536ce1851f987bd21a43b771b09473c3549a6c853db37c1c" -dependencies = [ - "futures-core", - "futures-util", - "pin-project", - "pin-project-lite", - "tokio", - "tower-layer", - "tower-service", -] - -[[package]] -name = "tower-layer" -version = "0.3.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "121c2a6cda46980bb0fcd1647ffaf6cd3fc79a013de288782836f6df9c48780e" - [[package]] name = "tower-service" version = "0.3.3" @@ -3964,9 +3923,9 @@ checksum = "f6ccf251212114b54433ec949fd6a7841275f9ada20dddd2f29e9ceea4501493" [[package]] name = "unicode-width" -version = "0.1.13" +version = "0.1.14" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0336d538f7abc86d282a4189614dfaa90810dfc2c6f6427eaf88e16311dd225d" +checksum = "7dd6e30e90baa6f72411720665d41d89b9a3d039dc45b8faea1ddd07f617f6af" [[package]] name = "untrusted" @@ -4122,9 +4081,9 @@ checksum = "c62a0a307cb4a311d3a07867860911ca130c3494e8c2719593806c08bc5d0484" [[package]] name = "wasm-streams" -version = "0.4.0" +version = "0.4.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b65dc4c90b63b118468cf747d8bf3566c1913ef60be765b5730ead9e0a3ba129" +checksum = "4e072d4e72f700fb3443d8fe94a39315df013eef1104903cdb0a2abd322bbecd" dependencies = [ "futures-util", "js-sys", @@ -4341,9 +4300,9 @@ checksum = "589f6da84c646204747d1270a2a5661ea66ed1cced2631d546fdfb155959f9ec" [[package]] name = "winnow" -version = "0.6.18" +version = "0.6.20" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "68a9bda4691f099d435ad181000724da8e5899daa10713c2d432552b9ccd3a6f" +checksum = "36c1fec1a2bb5866f07c25f68c26e565c4c200aebb96d7e55710c19d3e8ac49b" dependencies = [ "memchr", ] From ca6ea447e6d751e3f90c74aee4fae18c53e3e27a Mon Sep 17 00:00:00 2001 From: jcsherin Date: Sat, 28 Sep 2024 23:18:05 +0530 Subject: [PATCH 06/34] Fixes: expand `$FN_NAME` in doc strings --- datafusion/functions-window/src/macros.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/datafusion/functions-window/src/macros.rs b/datafusion/functions-window/src/macros.rs index d13c42e5fe62..50e07e9db518 100644 --- a/datafusion/functions-window/src/macros.rs +++ b/datafusion/functions-window/src/macros.rs @@ -31,13 +31,13 @@ macro_rules! create_udwf { ($STRUCT_NAME:ident, $FN_NAME:ident, $DOC:expr, $CTOR:path) => { paste::paste! { - #[doc = concat!(" Singleton instance of [`", stringify!($STRUCT_NAME), "`], ensures the user-defined")] + #[doc = concat!(" Singleton instance of [`", stringify!($FN_NAME), "`], ensures the user-defined")] #[doc = concat!(" window function is only created once.")] #[allow(non_upper_case_globals)] static []: std::sync::OnceLock> = std::sync::OnceLock::new(); - #[doc = concat!(" Returns a [`WindowUDF`](datafusion_expr::WindowUDF) for [`", stringify!($STRUCT_NAME), "`].")] + #[doc = concat!(" Returns a [`WindowUDF`](datafusion_expr::WindowUDF) for [`", stringify!($FN_NAME), "`].")] #[doc = ""] #[doc = concat!(" ", $DOC)] pub fn [<$FN_NAME _udwf>]() -> std::sync::Arc { From 0288a3695e92a33081a978dc744a210513894606 Mon Sep 17 00:00:00 2001 From: jcsherin Date: Sun, 29 Sep 2024 14:14:48 +0530 Subject: [PATCH 07/34] Adds example for macro usage --- datafusion/functions-window/src/lib.rs | 2 +- datafusion/functions-window/src/macros.rs | 62 +++++++++++++++++++++-- 2 files changed, 60 insertions(+), 4 deletions(-) diff --git a/datafusion/functions-window/src/lib.rs b/datafusion/functions-window/src/lib.rs index 62534b26062b..6e98bb091446 100644 --- a/datafusion/functions-window/src/lib.rs +++ b/datafusion/functions-window/src/lib.rs @@ -30,7 +30,7 @@ use datafusion_expr::registry::FunctionRegistry; use datafusion_expr::WindowUDF; #[macro_use] -mod macros; +pub mod macros; pub mod row_number; /// Fluent-style API for creating `Expr`s diff --git a/datafusion/functions-window/src/macros.rs b/datafusion/functions-window/src/macros.rs index 50e07e9db518..7ac44d8c153a 100644 --- a/datafusion/functions-window/src/macros.rs +++ b/datafusion/functions-window/src/macros.rs @@ -20,10 +20,66 @@ /// # Parameters /// /// * `$STRUCT_NAME`: The user-defined window function struct. -/// * `$FN_NAME`: The prefix for the generated function name. -/// * `$DOC`: The doc comment for the user-defined window function. -/// * `$CTOR`: The user-defined window function constructor. +/// * `$FN_NAME`: The prefix for the generated function name. The +/// generated function name is `$FN_NAME_udwf`. +/// * `$DOC`: The doc comments. +/// * (optional) `$CTOR`: An optional user-defined window function +/// constructor. By default, this will resolve to +/// `$STRUCT_NAME::default()`. Use this argument to customize +/// the constructor. /// +/// # Example +/// +/// This shows the usage of the `make_udwf` macro which is used to +/// create a user-defined window function. +/// +/// ``` +/// use std::any::Any; +/// use datafusion_common::arrow::datatypes::{DataType, Field}; +/// use datafusion_expr::{PartitionEvaluator, Signature, Volatility, WindowUDFImpl}; +/// +/// use datafusion_functions_window_common::field::WindowUDFFieldArgs; +/// use datafusion_functions_window::create_udwf; +/// +/// #[derive(Debug)] +/// struct AddOne { +/// signature: Signature, +/// } +/// +/// impl Default for AddOne { +/// fn default() -> Self { +/// Self { +/// signature: Signature::numeric(1, Volatility::Immutable), +/// } +/// } +/// } +/// +/// impl WindowUDFImpl for AddOne { +/// fn as_any(&self) -> &dyn Any { +/// self +/// } +/// fn name(&self) -> &str { +/// "add_one" +/// } +/// fn signature(&self) -> &Signature { +/// &self.signature +/// } +/// fn partition_evaluator( +/// &self, +/// ) -> datafusion_common::Result> { +/// unimplemented!("unnecessary for doc test") +/// } +/// fn field(&self, field_args: WindowUDFFieldArgs) -> datafusion_common::Result { +/// Ok(Field::new(field_args.name(), DataType::Int64, false)) +/// } +/// } +/// +/// /// This creates a singleton instance of `AddOne` user-defined +/// /// window function named `add_one_udwf()`. +/// create_udwf!(AddOne, add_one, "Adds one to each row value in window partition."); +/// ``` + +#[macro_export] macro_rules! create_udwf { ($STRUCT_NAME:ident, $FN_NAME:ident, $DOC:expr) => { create_udwf!($STRUCT_NAME, $FN_NAME, $DOC, $STRUCT_NAME::default); From 2446ffb110ada00014feb6738351531c01702ced Mon Sep 17 00:00:00 2001 From: jcsherin Date: Sun, 29 Sep 2024 14:15:34 +0530 Subject: [PATCH 08/34] Renames macro --- datafusion/functions-window/src/macros.rs | 8 ++++---- datafusion/functions-window/src/row_number.rs | 2 +- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/datafusion/functions-window/src/macros.rs b/datafusion/functions-window/src/macros.rs index 7ac44d8c153a..433a26c68b27 100644 --- a/datafusion/functions-window/src/macros.rs +++ b/datafusion/functions-window/src/macros.rs @@ -39,7 +39,7 @@ /// use datafusion_expr::{PartitionEvaluator, Signature, Volatility, WindowUDFImpl}; /// /// use datafusion_functions_window_common::field::WindowUDFFieldArgs; -/// use datafusion_functions_window::create_udwf; +/// use datafusion_functions_window::make_udwf_singleton; /// /// #[derive(Debug)] /// struct AddOne { @@ -76,13 +76,13 @@ /// /// /// This creates a singleton instance of `AddOne` user-defined /// /// window function named `add_one_udwf()`. -/// create_udwf!(AddOne, add_one, "Adds one to each row value in window partition."); +/// make_udwf_singleton!(AddOne, add_one, "Adds one to each row value in window partition."); /// ``` #[macro_export] -macro_rules! create_udwf { +macro_rules! make_udwf_singleton { ($STRUCT_NAME:ident, $FN_NAME:ident, $DOC:expr) => { - create_udwf!($STRUCT_NAME, $FN_NAME, $DOC, $STRUCT_NAME::default); + make_udwf_singleton!($STRUCT_NAME, $FN_NAME, $DOC, $STRUCT_NAME::default); }; ($STRUCT_NAME:ident, $FN_NAME:ident, $DOC:expr, $CTOR:path) => { diff --git a/datafusion/functions-window/src/row_number.rs b/datafusion/functions-window/src/row_number.rs index d521f2fa0887..66e10f62084f 100644 --- a/datafusion/functions-window/src/row_number.rs +++ b/datafusion/functions-window/src/row_number.rs @@ -32,7 +32,7 @@ use datafusion_expr::{Expr, PartitionEvaluator, Signature, Volatility, WindowUDF use datafusion_functions_window_common::field; use field::WindowUDFFieldArgs; -create_udwf!( +make_udwf_singleton!( RowNumber, row_number, "Returns a unique row number for each row in window partition beginning at 1." From 771465a1eca3a9a7fe9ac8a6693989f75efd83ac Mon Sep 17 00:00:00 2001 From: jcsherin Date: Sun, 29 Sep 2024 15:39:19 +0530 Subject: [PATCH 09/34] Improve doc comments --- datafusion/functions-window/src/macros.rs | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/datafusion/functions-window/src/macros.rs b/datafusion/functions-window/src/macros.rs index 433a26c68b27..c147fa269326 100644 --- a/datafusion/functions-window/src/macros.rs +++ b/datafusion/functions-window/src/macros.rs @@ -15,7 +15,9 @@ // specific language governing permissions and limitations // under the License. -/// Creates a singleton instance of a user-defined window function. +/// Lazily initializes a user-defined window function exactly once +/// when called concurrently. Repeated calls return a reference to the +/// same instance. /// /// # Parameters /// From b662b289200fbd61c469d08e09a9754ce5d7460a Mon Sep 17 00:00:00 2001 From: jcsherin Date: Sun, 29 Sep 2024 15:51:34 +0530 Subject: [PATCH 10/34] Rename udwf macro --- datafusion/functions-window/src/macros.rs | 8 ++++---- datafusion/functions-window/src/row_number.rs | 2 +- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/datafusion/functions-window/src/macros.rs b/datafusion/functions-window/src/macros.rs index c147fa269326..a620d6af2d49 100644 --- a/datafusion/functions-window/src/macros.rs +++ b/datafusion/functions-window/src/macros.rs @@ -41,7 +41,7 @@ /// use datafusion_expr::{PartitionEvaluator, Signature, Volatility, WindowUDFImpl}; /// /// use datafusion_functions_window_common::field::WindowUDFFieldArgs; -/// use datafusion_functions_window::make_udwf_singleton; +/// use datafusion_functions_window::get_or_init_udwf; /// /// #[derive(Debug)] /// struct AddOne { @@ -78,13 +78,13 @@ /// /// /// This creates a singleton instance of `AddOne` user-defined /// /// window function named `add_one_udwf()`. -/// make_udwf_singleton!(AddOne, add_one, "Adds one to each row value in window partition."); +/// get_or_init_udwf!(AddOne, add_one, "Adds one to each row value in window partition."); /// ``` #[macro_export] -macro_rules! make_udwf_singleton { +macro_rules! get_or_init_udwf { ($STRUCT_NAME:ident, $FN_NAME:ident, $DOC:expr) => { - make_udwf_singleton!($STRUCT_NAME, $FN_NAME, $DOC, $STRUCT_NAME::default); + get_or_init_udwf!($STRUCT_NAME, $FN_NAME, $DOC, $STRUCT_NAME::default); }; ($STRUCT_NAME:ident, $FN_NAME:ident, $DOC:expr, $CTOR:path) => { diff --git a/datafusion/functions-window/src/row_number.rs b/datafusion/functions-window/src/row_number.rs index 66e10f62084f..e55abcb405d0 100644 --- a/datafusion/functions-window/src/row_number.rs +++ b/datafusion/functions-window/src/row_number.rs @@ -32,7 +32,7 @@ use datafusion_expr::{Expr, PartitionEvaluator, Signature, Volatility, WindowUDF use datafusion_functions_window_common::field; use field::WindowUDFFieldArgs; -make_udwf_singleton!( +get_or_init_udwf!( RowNumber, row_number, "Returns a unique row number for each row in window partition beginning at 1." From 9e1b375023b6ddcbfb35a3c38c48324b0d0fa68c Mon Sep 17 00:00:00 2001 From: jcsherin Date: Sun, 29 Sep 2024 15:53:19 +0530 Subject: [PATCH 11/34] Minor: doc copy edits --- datafusion/functions-window/src/macros.rs | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/datafusion/functions-window/src/macros.rs b/datafusion/functions-window/src/macros.rs index a620d6af2d49..b8a6795d7b0e 100644 --- a/datafusion/functions-window/src/macros.rs +++ b/datafusion/functions-window/src/macros.rs @@ -32,9 +32,6 @@ /// /// # Example /// -/// This shows the usage of the `make_udwf` macro which is used to -/// create a user-defined window function. -/// /// ``` /// use std::any::Any; /// use datafusion_common::arrow::datatypes::{DataType, Field}; @@ -76,8 +73,7 @@ /// } /// } /// -/// /// This creates a singleton instance of `AddOne` user-defined -/// /// window function named `add_one_udwf()`. +/// /// This creates `add_one_udwf()` from `AddOne`. /// get_or_init_udwf!(AddOne, add_one, "Adds one to each row value in window partition."); /// ``` From 443747e27bd8d0fdebc8a092d766b5b1db53548e Mon Sep 17 00:00:00 2001 From: jcsherin Date: Sun, 29 Sep 2024 16:16:43 +0530 Subject: [PATCH 12/34] Adds macro for creating fluent-style expression API --- datafusion/functions-window/src/macros.rs | 14 ++++++++++++++ datafusion/functions-window/src/row_number.rs | 13 ++++++------- 2 files changed, 20 insertions(+), 7 deletions(-) diff --git a/datafusion/functions-window/src/macros.rs b/datafusion/functions-window/src/macros.rs index b8a6795d7b0e..86a33619c500 100644 --- a/datafusion/functions-window/src/macros.rs +++ b/datafusion/functions-window/src/macros.rs @@ -104,3 +104,17 @@ macro_rules! get_or_init_udwf { } }; } + +macro_rules! create_udwf_expr { + ($STRUCT_NAME:ident, $FN_NAME:ident, $DOC:expr) => { + paste::paste! { + #[doc = " Create a [`WindowFunction`](datafusion_expr::Expr::WindowFunction) expression for"] + #[doc = concat!(" [`", stringify!($STRUCT_NAME), "`] user-defined window function.")] + #[doc = ""] + #[doc = concat!(" ", $DOC)] + pub fn $FN_NAME() -> datafusion_expr::Expr { + [<$FN_NAME _udwf>]().call(vec![]) + } + } + }; +} diff --git a/datafusion/functions-window/src/row_number.rs b/datafusion/functions-window/src/row_number.rs index e55abcb405d0..1e87e8fee517 100644 --- a/datafusion/functions-window/src/row_number.rs +++ b/datafusion/functions-window/src/row_number.rs @@ -27,8 +27,7 @@ use datafusion_common::arrow::compute::SortOptions; use datafusion_common::arrow::datatypes::DataType; use datafusion_common::arrow::datatypes::Field; use datafusion_common::{Result, ScalarValue}; -use datafusion_expr::expr::WindowFunction; -use datafusion_expr::{Expr, PartitionEvaluator, Signature, Volatility, WindowUDFImpl}; +use datafusion_expr::{PartitionEvaluator, Signature, Volatility, WindowUDFImpl}; use datafusion_functions_window_common::field; use field::WindowUDFFieldArgs; @@ -38,11 +37,11 @@ get_or_init_udwf!( "Returns a unique row number for each row in window partition beginning at 1." ); -/// Create a [`WindowFunction`](Expr::WindowFunction) expression for -/// `row_number` user-defined window function. -pub fn row_number() -> Expr { - Expr::WindowFunction(WindowFunction::new(row_number_udwf(), vec![])) -} +create_udwf_expr!( + RowNumber, + row_number, + "Returns a unique row number for each row in window partition beginning at 1." +); /// row_number expression #[derive(Debug)] From 4b747c1b4c4f2798e36e7c2fbe9a4d88f6686fdd Mon Sep 17 00:00:00 2001 From: jcsherin Date: Sun, 29 Sep 2024 23:03:14 +0530 Subject: [PATCH 13/34] Adds support for 1 or more parameters in expression function --- datafusion/functions-window/src/macros.rs | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/datafusion/functions-window/src/macros.rs b/datafusion/functions-window/src/macros.rs index 86a33619c500..cd8cf568a595 100644 --- a/datafusion/functions-window/src/macros.rs +++ b/datafusion/functions-window/src/macros.rs @@ -106,6 +106,7 @@ macro_rules! get_or_init_udwf { } macro_rules! create_udwf_expr { + // zero arguments ($STRUCT_NAME:ident, $FN_NAME:ident, $DOC:expr) => { paste::paste! { #[doc = " Create a [`WindowFunction`](datafusion_expr::Expr::WindowFunction) expression for"] @@ -117,4 +118,20 @@ macro_rules! create_udwf_expr { } } }; + + // 1 or more arguments + ($STRUCT_NAME:ident, $FN_NAME:ident, [$($PARAM:ident),+], $DOC:expr) => { + paste::paste! { + #[doc = " Create a [`WindowFunction`](datafusion_expr::Expr::WindowFunction) expression for"] + #[doc = concat!(" [`", stringify!($STRUCT_NAME), "`] user-defined window function.")] + #[doc = ""] + #[doc = concat!(" ", $DOC)] + pub fn $FN_NAME( + $($PARAM: datafusion_expr::Expr),+ + ) -> datafusion_expr::Expr { + [<$FN_NAME _udwf>]() + .call(vec![$($PARAM),+]) + } + } + }; } From b73356adaeee5d83932e3509920316bce326f201 Mon Sep 17 00:00:00 2001 From: jcsherin Date: Sun, 29 Sep 2024 23:20:00 +0530 Subject: [PATCH 14/34] Rewrite doc comments --- datafusion/functions-window/src/macros.rs | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/datafusion/functions-window/src/macros.rs b/datafusion/functions-window/src/macros.rs index cd8cf568a595..a3bc6710ab81 100644 --- a/datafusion/functions-window/src/macros.rs +++ b/datafusion/functions-window/src/macros.rs @@ -21,14 +21,14 @@ /// /// # Parameters /// -/// * `$STRUCT_NAME`: The user-defined window function struct. -/// * `$FN_NAME`: The prefix for the generated function name. The -/// generated function name is `$FN_NAME_udwf`. -/// * `$DOC`: The doc comments. -/// * (optional) `$CTOR`: An optional user-defined window function -/// constructor. By default, this will resolve to -/// `$STRUCT_NAME::default()`. Use this argument to customize -/// the constructor. +/// * `$STRUCT_NAME`: The struct which defines the [`Signature`](datafusion_expr::Signature) +/// of the user-defined window function. +/// * `$FN_NAME`: The basename to generate a unique function name like +/// `$FN_NAME_udwf`. +/// * `$DOC`: Description of user-defined window function. +/// * (optional) `$CTOR`: When none provided it automatically resolves +/// to `$STRUCT_NAME::default()` (default constructor). To customize +/// pass a different constructor. /// /// # Example /// From 50ae9eff9b8fc889de07f5f0bb4d888a06555766 Mon Sep 17 00:00:00 2001 From: jcsherin Date: Sun, 29 Sep 2024 23:29:20 +0530 Subject: [PATCH 15/34] Rename parameters --- datafusion/functions-window/src/macros.rs | 41 ++++++++++++----------- 1 file changed, 21 insertions(+), 20 deletions(-) diff --git a/datafusion/functions-window/src/macros.rs b/datafusion/functions-window/src/macros.rs index a3bc6710ab81..f3b4cf31e5e2 100644 --- a/datafusion/functions-window/src/macros.rs +++ b/datafusion/functions-window/src/macros.rs @@ -21,13 +21,13 @@ /// /// # Parameters /// -/// * `$STRUCT_NAME`: The struct which defines the [`Signature`](datafusion_expr::Signature) +/// * `$UDWF`: The struct which defines the [`Signature`](datafusion_expr::Signature) /// of the user-defined window function. -/// * `$FN_NAME`: The basename to generate a unique function name like -/// `$FN_NAME_udwf`. +/// * `$OUT_FN_NAME`: The basename to generate a unique function name like +/// `$OUT_FN_NAME_udwf`. /// * `$DOC`: Description of user-defined window function. /// * (optional) `$CTOR`: When none provided it automatically resolves -/// to `$STRUCT_NAME::default()` (default constructor). To customize +/// to `$UDWF::default()` (default constructor). To customize /// pass a different constructor. /// /// # Example @@ -79,23 +79,24 @@ #[macro_export] macro_rules! get_or_init_udwf { - ($STRUCT_NAME:ident, $FN_NAME:ident, $DOC:expr) => { - get_or_init_udwf!($STRUCT_NAME, $FN_NAME, $DOC, $STRUCT_NAME::default); + ($UDWF:ident, $OUT_FN_NAME:ident, $DOC:expr) => { + get_or_init_udwf!($UDWF, $OUT_FN_NAME, $DOC, $UDWF::default); }; - ($STRUCT_NAME:ident, $FN_NAME:ident, $DOC:expr, $CTOR:path) => { + ($UDWF:ident, $OUT_FN_NAME:ident, $DOC:expr, $CTOR:path) => { + paste::paste! { - #[doc = concat!(" Singleton instance of [`", stringify!($FN_NAME), "`], ensures the user-defined")] + #[doc = concat!(" Singleton instance of [`", stringify!($OUT_FN_NAME), "`], ensures the user-defined")] #[doc = concat!(" window function is only created once.")] #[allow(non_upper_case_globals)] - static []: std::sync::OnceLock> = + static []: std::sync::OnceLock> = std::sync::OnceLock::new(); - #[doc = concat!(" Returns a [`WindowUDF`](datafusion_expr::WindowUDF) for [`", stringify!($FN_NAME), "`].")] + #[doc = concat!(" Returns a [`WindowUDF`](datafusion_expr::WindowUDF) for [`", stringify!($OUT_FN_NAME), "`].")] #[doc = ""] #[doc = concat!(" ", $DOC)] - pub fn [<$FN_NAME _udwf>]() -> std::sync::Arc { - [] + pub fn [<$OUT_FN_NAME _udwf>]() -> std::sync::Arc { + [] .get_or_init(|| { std::sync::Arc::new(datafusion_expr::WindowUDF::from($CTOR())) }) @@ -107,29 +108,29 @@ macro_rules! get_or_init_udwf { macro_rules! create_udwf_expr { // zero arguments - ($STRUCT_NAME:ident, $FN_NAME:ident, $DOC:expr) => { + ($UDWF:ident, $OUT_FN_NAME:ident, $DOC:expr) => { paste::paste! { #[doc = " Create a [`WindowFunction`](datafusion_expr::Expr::WindowFunction) expression for"] - #[doc = concat!(" [`", stringify!($STRUCT_NAME), "`] user-defined window function.")] + #[doc = concat!(" [`", stringify!($UDWF), "`] user-defined window function.")] #[doc = ""] #[doc = concat!(" ", $DOC)] - pub fn $FN_NAME() -> datafusion_expr::Expr { - [<$FN_NAME _udwf>]().call(vec![]) + pub fn $OUT_FN_NAME() -> datafusion_expr::Expr { + [<$OUT_FN_NAME _udwf>]().call(vec![]) } } }; // 1 or more arguments - ($STRUCT_NAME:ident, $FN_NAME:ident, [$($PARAM:ident),+], $DOC:expr) => { + ($UDWF:ident, $OUT_FN_NAME:ident, [$($PARAM:ident),+], $DOC:expr) => { paste::paste! { #[doc = " Create a [`WindowFunction`](datafusion_expr::Expr::WindowFunction) expression for"] - #[doc = concat!(" [`", stringify!($STRUCT_NAME), "`] user-defined window function.")] + #[doc = concat!(" [`", stringify!($UDWF), "`] user-defined window function.")] #[doc = ""] #[doc = concat!(" ", $DOC)] - pub fn $FN_NAME( + pub fn $OUT_FN_NAME( $($PARAM: datafusion_expr::Expr),+ ) -> datafusion_expr::Expr { - [<$FN_NAME _udwf>]() + [<$OUT_FN_NAME _udwf>]() .call(vec![$($PARAM),+]) } } From 8e27029d09bc8d255a714729c33abffae8988551 Mon Sep 17 00:00:00 2001 From: jcsherin Date: Mon, 30 Sep 2024 16:42:43 +0530 Subject: [PATCH 16/34] Minor: formatting --- datafusion/functions-window/src/macros.rs | 2 -- 1 file changed, 2 deletions(-) diff --git a/datafusion/functions-window/src/macros.rs b/datafusion/functions-window/src/macros.rs index f3b4cf31e5e2..3cabb5ed0025 100644 --- a/datafusion/functions-window/src/macros.rs +++ b/datafusion/functions-window/src/macros.rs @@ -76,7 +76,6 @@ /// /// This creates `add_one_udwf()` from `AddOne`. /// get_or_init_udwf!(AddOne, add_one, "Adds one to each row value in window partition."); /// ``` - #[macro_export] macro_rules! get_or_init_udwf { ($UDWF:ident, $OUT_FN_NAME:ident, $DOC:expr) => { @@ -84,7 +83,6 @@ macro_rules! get_or_init_udwf { }; ($UDWF:ident, $OUT_FN_NAME:ident, $DOC:expr, $CTOR:path) => { - paste::paste! { #[doc = concat!(" Singleton instance of [`", stringify!($OUT_FN_NAME), "`], ensures the user-defined")] #[doc = concat!(" window function is only created once.")] From e100bc6894ad70728b71f721fbf67effabc04be2 Mon Sep 17 00:00:00 2001 From: jcsherin Date: Mon, 30 Sep 2024 17:58:08 +0530 Subject: [PATCH 17/34] Adds doc comment for `create_udwf_expr` macro --- datafusion/functions-window/src/macros.rs | 80 +++++++++++++++++++++++ 1 file changed, 80 insertions(+) diff --git a/datafusion/functions-window/src/macros.rs b/datafusion/functions-window/src/macros.rs index 3cabb5ed0025..fd4fa5b6ca2e 100644 --- a/datafusion/functions-window/src/macros.rs +++ b/datafusion/functions-window/src/macros.rs @@ -104,6 +104,86 @@ macro_rules! get_or_init_udwf { }; } +/// Create a [`WindowFunction`] expression that exposes a fluent API +/// which you can use to build more complex expressions and contains +/// additional [`ExprFunctionExt`] methods for configuring user-defined +/// window functions. +/// +/// [`WindowFunction`]: datafusion_expr::Expr::WindowFunction +/// [`ExprFunctionExt`]: datafusion_expr::expr_fn::ExprFunctionExt +/// +/// # Parameters +/// +/// * `$UDWF`: The struct which defines the [`Signature`] of the +/// user-defined window function. +/// * `$OUT_FN_NAME`: The basename to generate a unique function name like +/// `$OUT_FN_NAME_udwf`. +/// * `$DOC`: Description of user-defined window function. +/// * (optional) `[$($PARAM:ident),+]`: An array of 1 or more parameters +/// for the generated function. The type of parameters is [`Expr`]. +/// This is unnecessary for functions which take no arguments. +/// +/// [`Signature`]: datafusion_expr::Signature +/// [`Expr`]: datafusion_expr::Expr +/// +/// # Example +/// +/// 1. Function with zero parameters +/// ``` +/// use std::any::Any; +/// use datafusion_common::arrow::datatypes::{DataType, Field}; +/// use datafusion_expr::{PartitionEvaluator, Signature, Volatility, WindowUDFImpl}; +/// use datafusion_functions_window::{create_udwf_expr, get_or_init_udwf}; +/// use datafusion_functions_window_common::field::WindowUDFFieldArgs; +/// +/// #[derive(Debug)] +/// struct RowNumber { +/// signature: Signature, +/// } +/// +/// # get_or_init_udwf!( +/// # RowNumber, +/// # row_number, +/// # "Returns a unique row number for each row in window partition beginning at 1." +/// # ); +/// // Creates `row_number()` API which has no parameters +/// create_udwf_expr!( +/// RowNumber, +/// row_number, +/// "Returns a unique row number for each row in window partition beginning at 1." +/// ); +/// +/// # impl Default for RowNumber { +/// # fn default() -> Self { +/// # Self { +/// # signature: Signature::any(0, Volatility::Immutable), +/// # } +/// # } +/// # } +/// +/// # impl WindowUDFImpl for RowNumber { +/// # fn as_any(&self) -> &dyn Any { +/// # self +/// # } +/// # fn name(&self) -> &str { +/// # "row_number" +/// # } +/// # fn signature(&self) -> &Signature { +/// # &self.signature +/// # } +/// # fn partition_evaluator( +/// # &self, +/// # ) -> datafusion_common::Result> { +/// # unimplemented!() +/// # } +/// # fn field(&self, field_args: WindowUDFFieldArgs) -> datafusion_common::Result { +/// # Ok(Field::new(field_args.name(), DataType::UInt64, false)) +/// # } +/// # } +/// +/// ``` + +#[macro_export] macro_rules! create_udwf_expr { // zero arguments ($UDWF:ident, $OUT_FN_NAME:ident, $DOC:expr) => { From a5f6c2a1cd49e5897088d70e2fad5c6573475eda Mon Sep 17 00:00:00 2001 From: jcsherin Date: Mon, 30 Sep 2024 20:55:41 +0530 Subject: [PATCH 18/34] Improve example docs --- datafusion/functions-window/src/macros.rs | 18 +++++++++++------- 1 file changed, 11 insertions(+), 7 deletions(-) diff --git a/datafusion/functions-window/src/macros.rs b/datafusion/functions-window/src/macros.rs index fd4fa5b6ca2e..03c56c3b631c 100644 --- a/datafusion/functions-window/src/macros.rs +++ b/datafusion/functions-window/src/macros.rs @@ -130,13 +130,12 @@ macro_rules! get_or_init_udwf { /// /// 1. Function with zero parameters /// ``` -/// use std::any::Any; -/// use datafusion_common::arrow::datatypes::{DataType, Field}; -/// use datafusion_expr::{PartitionEvaluator, Signature, Volatility, WindowUDFImpl}; -/// use datafusion_functions_window::{create_udwf_expr, get_or_init_udwf}; -/// use datafusion_functions_window_common::field::WindowUDFFieldArgs; -/// -/// #[derive(Debug)] +/// # use std::any::Any; +/// # use datafusion_common::arrow::datatypes::{DataType, Field}; +/// # use datafusion_expr::{PartitionEvaluator, Signature, Volatility, WindowUDFImpl}; +/// # use datafusion_functions_window::{create_udwf_expr, get_or_init_udwf}; +/// # use datafusion_functions_window_common::field::WindowUDFFieldArgs; +/// # #[derive(Debug)] /// struct RowNumber { /// signature: Signature, /// } @@ -147,6 +146,11 @@ macro_rules! get_or_init_udwf { /// # "Returns a unique row number for each row in window partition beginning at 1." /// # ); /// // Creates `row_number()` API which has no parameters +/// // +/// // The macro expands into this: +/// // pub fn row_number() -> datafusion_expr::Expr { +/// // row_number_udwf().call(vec![]) +/// // } /// create_udwf_expr!( /// RowNumber, /// row_number, From fd70acbe023573e232ff6f0493b7d1422b302b27 Mon Sep 17 00:00:00 2001 From: jcsherin Date: Mon, 30 Sep 2024 21:27:32 +0530 Subject: [PATCH 19/34] Hides extraneous code in doc comments --- datafusion/functions-window/src/macros.rs | 171 +++++++++++++++------- 1 file changed, 122 insertions(+), 49 deletions(-) diff --git a/datafusion/functions-window/src/macros.rs b/datafusion/functions-window/src/macros.rs index 03c56c3b631c..8638d67125e2 100644 --- a/datafusion/functions-window/src/macros.rs +++ b/datafusion/functions-window/src/macros.rs @@ -33,48 +33,49 @@ /// # Example /// /// ``` -/// use std::any::Any; -/// use datafusion_common::arrow::datatypes::{DataType, Field}; -/// use datafusion_expr::{PartitionEvaluator, Signature, Volatility, WindowUDFImpl}; -/// -/// use datafusion_functions_window_common::field::WindowUDFFieldArgs; -/// use datafusion_functions_window::get_or_init_udwf; -/// -/// #[derive(Debug)] -/// struct AddOne { -/// signature: Signature, -/// } -/// -/// impl Default for AddOne { -/// fn default() -> Self { -/// Self { -/// signature: Signature::numeric(1, Volatility::Immutable), -/// } -/// } -/// } -/// -/// impl WindowUDFImpl for AddOne { -/// fn as_any(&self) -> &dyn Any { -/// self -/// } -/// fn name(&self) -> &str { -/// "add_one" -/// } -/// fn signature(&self) -> &Signature { -/// &self.signature -/// } -/// fn partition_evaluator( -/// &self, -/// ) -> datafusion_common::Result> { -/// unimplemented!("unnecessary for doc test") -/// } -/// fn field(&self, field_args: WindowUDFFieldArgs) -> datafusion_common::Result { -/// Ok(Field::new(field_args.name(), DataType::Int64, false)) -/// } -/// } -/// +/// # use std::any::Any; +/// # use datafusion_common::arrow::datatypes::{DataType, Field}; +/// # use datafusion_expr::{PartitionEvaluator, Signature, Volatility, WindowUDFImpl}; +/// # +/// # use datafusion_functions_window_common::field::WindowUDFFieldArgs; +/// # use datafusion_functions_window::get_or_init_udwf; +/// # /// /// This creates `add_one_udwf()` from `AddOne`. /// get_or_init_udwf!(AddOne, add_one, "Adds one to each row value in window partition."); +/// # +/// # #[derive(Debug)] +/// # struct AddOne { +/// # signature: Signature, +/// # } +/// # +/// # impl Default for AddOne { +/// # fn default() -> Self { +/// # Self { +/// # signature: Signature::numeric(1, Volatility::Immutable), +/// # } +/// # } +/// # } +/// # +/// # impl WindowUDFImpl for AddOne { +/// # fn as_any(&self) -> &dyn Any { +/// # self +/// # } +/// # fn name(&self) -> &str { +/// # "add_one" +/// # } +/// # fn signature(&self) -> &Signature { +/// # &self.signature +/// # } +/// # fn partition_evaluator( +/// # &self, +/// # ) -> datafusion_common::Result> { +/// # unimplemented!("unnecessary for doc test") +/// # } +/// # fn field(&self, field_args: WindowUDFFieldArgs) -> datafusion_common::Result { +/// # Ok(Field::new(field_args.name(), DataType::Int64, false)) +/// # } +/// # } +/// # /// ``` #[macro_export] macro_rules! get_or_init_udwf { @@ -128,18 +129,13 @@ macro_rules! get_or_init_udwf { /// /// # Example /// -/// 1. Function with zero parameters +/// 1. With zero parameters /// ``` /// # use std::any::Any; /// # use datafusion_common::arrow::datatypes::{DataType, Field}; /// # use datafusion_expr::{PartitionEvaluator, Signature, Volatility, WindowUDFImpl}; /// # use datafusion_functions_window::{create_udwf_expr, get_or_init_udwf}; /// # use datafusion_functions_window_common::field::WindowUDFFieldArgs; -/// # #[derive(Debug)] -/// struct RowNumber { -/// signature: Signature, -/// } -/// /// # get_or_init_udwf!( /// # RowNumber, /// # row_number, @@ -156,7 +152,10 @@ macro_rules! get_or_init_udwf { /// row_number, /// "Returns a unique row number for each row in window partition beginning at 1." /// ); -/// +/// # #[derive(Debug)] +/// # struct RowNumber { +/// # signature: Signature, +/// # } /// # impl Default for RowNumber { /// # fn default() -> Self { /// # Self { @@ -164,7 +163,6 @@ macro_rules! get_or_init_udwf { /// # } /// # } /// # } -/// /// # impl WindowUDFImpl for RowNumber { /// # fn as_any(&self) -> &dyn Any { /// # self @@ -184,9 +182,84 @@ macro_rules! get_or_init_udwf { /// # Ok(Field::new(field_args.name(), DataType::UInt64, false)) /// # } /// # } +/// ``` /// +/// 2. With at least 1 parameter +/// ``` +/// # use std::any::Any; +/// # +/// # use datafusion_expr::{ +/// # PartitionEvaluator, Signature, TypeSignature, Volatility, WindowUDFImpl, +/// # }; +/// # +/// # use datafusion_functions_window::{create_udwf_expr, get_or_init_udwf}; +/// # use datafusion_functions_window_common::field::WindowUDFFieldArgs; +/// # +/// # use datafusion_common::arrow::datatypes::Field; +/// # +/// # get_or_init_udwf!(Lead, lead, "user-defined window function"); +/// # +/// // Creates `lead(expr, offset, default)` with 3 parameters +/// // +/// // The macros expands into this: +/// // pub fn lead( +/// // expr: datafusion_expr::Expr, +/// // offset: datafusion_expr::Expr, +/// // default: datafusion_expr::Expr, +/// // ) -> datafusion_expr::Expr { +/// // lead_udwf().call(vec![expr, offset, default]) +/// // } +/// create_udwf_expr!( +/// Lead, +/// lead, +/// [expr, offset, default], +/// "user-defined window function" +/// ); +///# +///# #[derive(Debug)] +///# struct Lead { +///# signature: Signature, +///# } +///# +///# impl Default for Lead { +///# fn default() -> Self { +///# Self { +///# signature: Signature::one_of( +///# vec![ +///# TypeSignature::Any(1), +///# TypeSignature::Any(2), +///# TypeSignature::Any(3), +///# ], +///# Volatility::Immutable, +///# ), +///# } +///# } +///# } +///# +///# impl WindowUDFImpl for Lead { +///# fn as_any(&self) -> &dyn Any { +///# self +///# } +///# fn name(&self) -> &str { +///# "lead" +///# } +///# fn signature(&self) -> &Signature { +///# &self.signature +///# } +///# fn partition_evaluator( +///# &self, +///# ) -> datafusion_common::Result> { +///# unimplemented!() +///# } +///# fn field(&self, field_args: WindowUDFFieldArgs) -> datafusion_common::Result { +///# Ok(Field::new( +///# field_args.name(), +///# field_args.get_input_type(0).unwrap(), +///# false, +///# )) +///# } +///# } /// ``` - #[macro_export] macro_rules! create_udwf_expr { // zero arguments From 5138c61e5cd440c286e5202a733c628ea37b2497 Mon Sep 17 00:00:00 2001 From: jcsherin Date: Mon, 30 Sep 2024 22:24:08 +0530 Subject: [PATCH 20/34] Add a one-line readme --- datafusion/functions-window/src/macros.rs | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/datafusion/functions-window/src/macros.rs b/datafusion/functions-window/src/macros.rs index 8638d67125e2..b4a97a685dd2 100644 --- a/datafusion/functions-window/src/macros.rs +++ b/datafusion/functions-window/src/macros.rs @@ -15,6 +15,12 @@ // specific language governing permissions and limitations // under the License. +//! Macros for generating [`WindowUDF`] and a [`WindowFunction`] +//! expression with a fluent API. +//! +//! [`WindowUDF`]: datafusion_expr::WindowUDF +//! [`WindowFunction`]: datafusion_expr::Expr::WindowFunction + /// Lazily initializes a user-defined window function exactly once /// when called concurrently. Repeated calls return a reference to the /// same instance. From 19cd99904b651302883838a5c2dea9a4e5ace131 Mon Sep 17 00:00:00 2001 From: jcsherin Date: Mon, 30 Sep 2024 22:47:32 +0530 Subject: [PATCH 21/34] Adds doc test assertions + minor formatting fixes --- datafusion/functions-window/src/macros.rs | 105 +++++++++++++--------- 1 file changed, 61 insertions(+), 44 deletions(-) diff --git a/datafusion/functions-window/src/macros.rs b/datafusion/functions-window/src/macros.rs index b4a97a685dd2..23cfa975e87f 100644 --- a/datafusion/functions-window/src/macros.rs +++ b/datafusion/functions-window/src/macros.rs @@ -49,6 +49,8 @@ /// /// This creates `add_one_udwf()` from `AddOne`. /// get_or_init_udwf!(AddOne, add_one, "Adds one to each row value in window partition."); /// # +/// # assert_eq!(add_one_udwf().name(), "add_one"); +/// # /// # #[derive(Debug)] /// # struct AddOne { /// # signature: Signature, @@ -158,6 +160,12 @@ macro_rules! get_or_init_udwf { /// row_number, /// "Returns a unique row number for each row in window partition beginning at 1." /// ); +/// # +/// # assert_eq!( +/// # row_number().name_for_alias().unwrap(), +/// # "row_number() ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING" +/// # ); +/// # /// # #[derive(Debug)] /// # struct RowNumber { /// # signature: Signature, @@ -202,6 +210,8 @@ macro_rules! get_or_init_udwf { /// # use datafusion_functions_window_common::field::WindowUDFFieldArgs; /// # /// # use datafusion_common::arrow::datatypes::Field; +/// # use datafusion_common::ScalarValue; +/// # use datafusion_expr::{col, lit}; /// # /// # get_or_init_udwf!(Lead, lead, "user-defined window function"); /// # @@ -221,50 +231,57 @@ macro_rules! get_or_init_udwf { /// [expr, offset, default], /// "user-defined window function" /// ); -///# -///# #[derive(Debug)] -///# struct Lead { -///# signature: Signature, -///# } -///# -///# impl Default for Lead { -///# fn default() -> Self { -///# Self { -///# signature: Signature::one_of( -///# vec![ -///# TypeSignature::Any(1), -///# TypeSignature::Any(2), -///# TypeSignature::Any(3), -///# ], -///# Volatility::Immutable, -///# ), -///# } -///# } -///# } -///# -///# impl WindowUDFImpl for Lead { -///# fn as_any(&self) -> &dyn Any { -///# self -///# } -///# fn name(&self) -> &str { -///# "lead" -///# } -///# fn signature(&self) -> &Signature { -///# &self.signature -///# } -///# fn partition_evaluator( -///# &self, -///# ) -> datafusion_common::Result> { -///# unimplemented!() -///# } -///# fn field(&self, field_args: WindowUDFFieldArgs) -> datafusion_common::Result { -///# Ok(Field::new( -///# field_args.name(), -///# field_args.get_input_type(0).unwrap(), -///# false, -///# )) -///# } -///# } +/// # +/// # assert_eq!( +/// # lead(col("a"), lit(1i64), lit(ScalarValue::Null)) +/// # .name_for_alias() +/// # .unwrap(), +/// # "lead(a,Int64(1),NULL) ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING" +/// # ); +/// # +/// # #[derive(Debug)] +/// # struct Lead { +/// # signature: Signature, +/// # } +/// # +/// # impl Default for Lead { +/// # fn default() -> Self { +/// # Self { +/// # signature: Signature::one_of( +/// # vec![ +/// # TypeSignature::Any(1), +/// # TypeSignature::Any(2), +/// # TypeSignature::Any(3), +/// # ], +/// # Volatility::Immutable, +/// # ), +/// # } +/// # } +/// # } +/// # +/// # impl WindowUDFImpl for Lead { +/// # fn as_any(&self) -> &dyn Any { +/// # self +/// # } +/// # fn name(&self) -> &str { +/// # "lead" +/// # } +/// # fn signature(&self) -> &Signature { +/// # &self.signature +/// # } +/// # fn partition_evaluator( +/// # &self, +/// # ) -> datafusion_common::Result> { +/// # unimplemented!() +/// # } +/// # fn field(&self, field_args: WindowUDFFieldArgs) -> datafusion_common::Result { +/// # Ok(Field::new( +/// # field_args.name(), +/// # field_args.get_input_type(0).unwrap(), +/// # false, +/// # )) +/// # } +/// # } /// ``` #[macro_export] macro_rules! create_udwf_expr { From 9dbb94c89185733c48df1117fa49fa23e355e268 Mon Sep 17 00:00:00 2001 From: jcsherin Date: Tue, 1 Oct 2024 17:28:39 +0530 Subject: [PATCH 22/34] Adds common macro for defining user-defined window functions --- datafusion/functions-window/src/macros.rs | 27 +++++++++++++++++++++++ 1 file changed, 27 insertions(+) diff --git a/datafusion/functions-window/src/macros.rs b/datafusion/functions-window/src/macros.rs index 23cfa975e87f..d008aea8031c 100644 --- a/datafusion/functions-window/src/macros.rs +++ b/datafusion/functions-window/src/macros.rs @@ -314,3 +314,30 @@ macro_rules! create_udwf_expr { } }; } + +#[macro_export] +macro_rules! define_udwf_and_expr { + // default constructor, zero arguments + ($UDWF:ident, $OUT_FN_NAME:ident, $DOC:expr) => { + get_or_init_udwf!($UDWF, $OUT_FN_NAME, $DOC); + create_udwf_expr!($UDWF, $OUT_FN_NAME, $DOC); + }; + + // custom constructor, zero arguments + ($UDWF:ident, $OUT_FN_NAME:ident, $DOC:expr, $CTOR:path) => { + get_or_init_udwf!($UDWF, $OUT_FN_NAME, $DOC, $CTOR); + create_udwf_expr!($UDWF, $OUT_FN_NAME, $DOC); + }; + + // default constructor, 1 or more arguments + ($UDWF:ident, $OUT_FN_NAME:ident, [$($PARAM:ident),+], $DOC:expr) => { + get_or_init_udwf!($UDWF, $OUT_FN_NAME, $DOC); + create_udwf_expr!($UDWF, $OUT_FN_NAME, [$($PARAM:ident),+], $DOC); + }; + + // custom constructor, 1 or more arguments + ($UDWF:ident, $OUT_FN_NAME:ident, [$($PARAM:ident),+], $DOC:expr, $CTOR:path) => { + get_or_init_udwf!($UDWF, $OUT_FN_NAME, $DOC, $CTOR); + create_udwf_expr!($UDWF, $OUT_FN_NAME, [$($PARAM:ident),+], $DOC); + }; +} \ No newline at end of file From cadcf685b7d23cb5b2b04875b090df889eaae72e Mon Sep 17 00:00:00 2001 From: jcsherin Date: Tue, 1 Oct 2024 17:38:05 +0530 Subject: [PATCH 23/34] Adds doc comment for `define_udwf_and_expr` --- datafusion/functions-window/src/macros.rs | 23 ++++++++++++++++++++++- 1 file changed, 22 insertions(+), 1 deletion(-) diff --git a/datafusion/functions-window/src/macros.rs b/datafusion/functions-window/src/macros.rs index d008aea8031c..6b9830e2d64b 100644 --- a/datafusion/functions-window/src/macros.rs +++ b/datafusion/functions-window/src/macros.rs @@ -315,6 +315,27 @@ macro_rules! create_udwf_expr { }; } +/// Defines a user-defined window function. +/// +/// Combines [`get_or_init_udwf!`] and [`create_udwf_expr!`] into a +/// single macro for convenience. +/// +/// # Arguments +/// +/// * `$UDWF`: The struct which defines the [`Signature`] of the +/// user-defined window function. +/// * `$OUT_FN_NAME`: The basename to generate a unique function name like +/// `$OUT_FN_NAME_udwf`. +/// * (optional) `[$($PARAM:ident),+]`: An array of 1 or more parameters +/// for the generated function. The type of parameters is [`Expr`]. +/// This is unnecessary for functions which take no arguments. +/// * `$DOC`: Description of user-defined window function. +/// * (optional) `$CTOR`: When none provided it automatically resolves +/// to `$UDWF::default()` (default constructor). To customize +/// pass a different constructor. +/// +/// [`Signature`]: datafusion_expr::Signature +/// [`Expr`]: datafusion_expr::Expr #[macro_export] macro_rules! define_udwf_and_expr { // default constructor, zero arguments @@ -340,4 +361,4 @@ macro_rules! define_udwf_and_expr { get_or_init_udwf!($UDWF, $OUT_FN_NAME, $DOC, $CTOR); create_udwf_expr!($UDWF, $OUT_FN_NAME, [$($PARAM:ident),+], $DOC); }; -} \ No newline at end of file +} From cafdf56a3c4034e717bf7a733dd937a3b4931a5f Mon Sep 17 00:00:00 2001 From: jcsherin Date: Tue, 1 Oct 2024 17:44:43 +0530 Subject: [PATCH 24/34] Defines `RowNumber` using common macro --- datafusion/functions-window/src/row_number.rs | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) diff --git a/datafusion/functions-window/src/row_number.rs b/datafusion/functions-window/src/row_number.rs index 1e87e8fee517..a2e1b2222bb7 100644 --- a/datafusion/functions-window/src/row_number.rs +++ b/datafusion/functions-window/src/row_number.rs @@ -31,13 +31,7 @@ use datafusion_expr::{PartitionEvaluator, Signature, Volatility, WindowUDFImpl}; use datafusion_functions_window_common::field; use field::WindowUDFFieldArgs; -get_or_init_udwf!( - RowNumber, - row_number, - "Returns a unique row number for each row in window partition beginning at 1." -); - -create_udwf_expr!( +define_udwf_and_expr!( RowNumber, row_number, "Returns a unique row number for each row in window partition beginning at 1." From 9ae5caaf7689108e6086369b62d5f48ca2b6c4e7 Mon Sep 17 00:00:00 2001 From: jcsherin Date: Tue, 1 Oct 2024 17:55:55 +0530 Subject: [PATCH 25/34] Add usage example for common macro --- datafusion/functions-window/src/macros.rs | 56 +++++++++++++++++++++++ 1 file changed, 56 insertions(+) diff --git a/datafusion/functions-window/src/macros.rs b/datafusion/functions-window/src/macros.rs index 6b9830e2d64b..dfd2b9084384 100644 --- a/datafusion/functions-window/src/macros.rs +++ b/datafusion/functions-window/src/macros.rs @@ -336,6 +336,62 @@ macro_rules! create_udwf_expr { /// /// [`Signature`]: datafusion_expr::Signature /// [`Expr`]: datafusion_expr::Expr +/// +/// # Usage +/// +/// 1. Build a UDWF using its default constructor and create an +/// expression API which has zero parameters. +/// +/// ``` +/// # use std::any::Any; +/// # use datafusion_common::arrow::datatypes::{DataType, Field}; +/// # use datafusion_expr::{PartitionEvaluator, Signature, Volatility, WindowUDFImpl}; +/// # +/// # use datafusion_functions_window_common::field::WindowUDFFieldArgs; +/// # use datafusion_functions_window::{define_udwf_and_expr, get_or_init_udwf, create_udwf_expr}; +/// # +/// /// This creates: +/// /// 1. `add_one_udwf()`: user-defined window function and, +/// /// 2. `add_one()`: `WindowFunction` expression with zero parameters +/// /// +/// define_udwf_and_expr!(AddOne, add_one, "Adds one to each row value in window partition."); +/// # +/// # assert_eq!(add_one_udwf().name(), "add_one"); +/// # +/// # #[derive(Debug)] +/// # struct AddOne { +/// # signature: Signature, +/// # } +/// # +/// # impl Default for AddOne { +/// # fn default() -> Self { +/// # Self { +/// # signature: Signature::numeric(1, Volatility::Immutable), +/// # } +/// # } +/// # } +/// # +/// # impl WindowUDFImpl for AddOne { +/// # fn as_any(&self) -> &dyn Any { +/// # self +/// # } +/// # fn name(&self) -> &str { +/// # "add_one" +/// # } +/// # fn signature(&self) -> &Signature { +/// # &self.signature +/// # } +/// # fn partition_evaluator( +/// # &self, +/// # ) -> datafusion_common::Result> { +/// # unimplemented!("unnecessary for doc test") +/// # } +/// # fn field(&self, field_args: WindowUDFFieldArgs) -> datafusion_common::Result { +/// # Ok(Field::new(field_args.name(), DataType::Int64, false)) +/// # } +/// # } +/// # +/// ``` #[macro_export] macro_rules! define_udwf_and_expr { // default constructor, zero arguments From 3c8589c93660d3b33c8dc035ba4d0d461d2bac07 Mon Sep 17 00:00:00 2001 From: jcsherin Date: Tue, 1 Oct 2024 18:10:23 +0530 Subject: [PATCH 26/34] Adds usage for custom constructor --- datafusion/functions-window/src/macros.rs | 65 +++++++++++++++++++++-- 1 file changed, 62 insertions(+), 3 deletions(-) diff --git a/datafusion/functions-window/src/macros.rs b/datafusion/functions-window/src/macros.rs index dfd2b9084384..eb0075db5704 100644 --- a/datafusion/functions-window/src/macros.rs +++ b/datafusion/functions-window/src/macros.rs @@ -339,8 +339,7 @@ macro_rules! create_udwf_expr { /// /// # Usage /// -/// 1. Build a UDWF using its default constructor and create an -/// expression API which has zero parameters. +/// 1. Uses default constructor for UDWF + Zero parameters in expression API. /// /// ``` /// # use std::any::Any; @@ -354,7 +353,11 @@ macro_rules! create_udwf_expr { /// /// 1. `add_one_udwf()`: user-defined window function and, /// /// 2. `add_one()`: `WindowFunction` expression with zero parameters /// /// -/// define_udwf_and_expr!(AddOne, add_one, "Adds one to each row value in window partition."); +/// define_udwf_and_expr!( +/// AddOne, +/// add_one, +/// "Adds one to each row value in window partition." +/// ); /// # /// # assert_eq!(add_one_udwf().name(), "add_one"); /// # @@ -392,6 +395,62 @@ macro_rules! create_udwf_expr { /// # } /// # /// ``` +/// 2. Uses a custom constructor for UDWF + Zero parameters in expression API. +/// +/// ``` +/// # use std::any::Any; +/// # use datafusion_common::arrow::datatypes::{DataType, Field}; +/// # use datafusion_expr::{PartitionEvaluator, Signature, Volatility, WindowUDFImpl}; +/// # use datafusion_functions_window::{create_udwf_expr, define_udwf_and_expr, get_or_init_udwf}; +/// # use datafusion_functions_window_common::field::WindowUDFFieldArgs; +/// # +/// /// This creates: +/// /// 1. `row_number_udwf()`: user-defined window function and, +/// /// 2. `row_number()`: `WindowFunction` expression with zero parameters +/// /// +/// define_udwf_and_expr!( +/// RowNumber, +/// row_number, +/// "Returns a unique row number for each row in window partition beginning at 1.", +/// RowNumber::new // <-- custom constructor +/// ); +/// # +/// # assert_eq!( +/// # row_number().name_for_alias().unwrap(), +/// # "row_number() ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING" +/// # ); +/// # +/// # #[derive(Debug)] +/// # struct RowNumber { +/// # signature: Signature, +/// # } +/// # impl RowNumber { +/// # fn new() -> Self { +/// # Self { +/// # signature: Signature::any(0, Volatility::Immutable), +/// # } +/// # } +/// # } +/// # impl WindowUDFImpl for RowNumber { +/// # fn as_any(&self) -> &dyn Any { +/// # self +/// # } +/// # fn name(&self) -> &str { +/// # "row_number" +/// # } +/// # fn signature(&self) -> &Signature { +/// # &self.signature +/// # } +/// # fn partition_evaluator( +/// # &self, +/// # ) -> datafusion_common::Result> { +/// # unimplemented!() +/// # } +/// # fn field(&self, field_args: WindowUDFFieldArgs) -> datafusion_common::Result { +/// # Ok(Field::new(field_args.name(), DataType::UInt64, false)) +/// # } +/// # } +/// ``` #[macro_export] macro_rules! define_udwf_and_expr { // default constructor, zero arguments From e6b41919027e5f65f904d1741ff79387bee3e891 Mon Sep 17 00:00:00 2001 From: jcsherin Date: Tue, 1 Oct 2024 19:16:13 +0530 Subject: [PATCH 27/34] Add examples for remaining patterns --- datafusion/functions-window/src/macros.rs | 181 +++++++++++++++++++++- 1 file changed, 177 insertions(+), 4 deletions(-) diff --git a/datafusion/functions-window/src/macros.rs b/datafusion/functions-window/src/macros.rs index eb0075db5704..06ebfbec0b89 100644 --- a/datafusion/functions-window/src/macros.rs +++ b/datafusion/functions-window/src/macros.rs @@ -339,7 +339,8 @@ macro_rules! create_udwf_expr { /// /// # Usage /// -/// 1. Uses default constructor for UDWF + Zero parameters in expression API. +/// ## Expression API With Zero parameters +/// 1. Uses default constructor for UDWF. /// /// ``` /// # use std::any::Any; @@ -395,7 +396,8 @@ macro_rules! create_udwf_expr { /// # } /// # /// ``` -/// 2. Uses a custom constructor for UDWF + Zero parameters in expression API. +/// +/// 2. Uses a custom constructor for UDWF. /// /// ``` /// # use std::any::Any; @@ -451,6 +453,177 @@ macro_rules! create_udwf_expr { /// # } /// # } /// ``` +/// +/// ## Expression API With Parameters +/// 3. Uses default constructor for UDWF +/// +/// ``` +/// # use std::any::Any; +/// # +/// # use datafusion_expr::{ +/// # PartitionEvaluator, Signature, TypeSignature, Volatility, WindowUDFImpl, +/// # }; +/// # +/// # use datafusion_functions_window::{create_udwf_expr, define_udwf_and_expr, get_or_init_udwf}; +/// # use datafusion_functions_window_common::field::WindowUDFFieldArgs; +/// # +/// # use datafusion_common::arrow::datatypes::Field; +/// # use datafusion_common::ScalarValue; +/// # use datafusion_expr::{col, lit}; +/// # +/// // Creates `lead(expr, offset, default)` with 3 parameters +/// // +/// // The macros expands into this: +/// // pub fn lead( +/// // expr: datafusion_expr::Expr, +/// // offset: datafusion_expr::Expr, +/// // default: datafusion_expr::Expr, +/// // ) -> datafusion_expr::Expr { +/// // lead_udwf().call(vec![expr, offset, default]) +/// // } +/// define_udwf_and_expr!( +/// Lead, +/// lead, +/// [expr, offset, default], +/// "user-defined window function" +/// ); +/// # +/// # assert_eq!( +/// # lead(col("a"), lit(1i64), lit(ScalarValue::Null)) +/// # .name_for_alias() +/// # .unwrap(), +/// # "lead(a,Int64(1),NULL) ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING" +/// # ); +/// # +/// # #[derive(Debug)] +/// # struct Lead { +/// # signature: Signature, +/// # } +/// # +/// # impl Default for Lead { +/// # fn default() -> Self { +/// # Self { +/// # signature: Signature::one_of( +/// # vec![ +/// # TypeSignature::Any(1), +/// # TypeSignature::Any(2), +/// # TypeSignature::Any(3), +/// # ], +/// # Volatility::Immutable, +/// # ), +/// # } +/// # } +/// # } +/// # +/// # impl WindowUDFImpl for Lead { +/// # fn as_any(&self) -> &dyn Any { +/// # self +/// # } +/// # fn name(&self) -> &str { +/// # "lead" +/// # } +/// # fn signature(&self) -> &Signature { +/// # &self.signature +/// # } +/// # fn partition_evaluator( +/// # &self, +/// # ) -> datafusion_common::Result> { +/// # unimplemented!() +/// # } +/// # fn field(&self, field_args: WindowUDFFieldArgs) -> datafusion_common::Result { +/// # Ok(Field::new( +/// # field_args.name(), +/// # field_args.get_input_type(0).unwrap(), +/// # false, +/// # )) +/// # } +/// # } +/// ``` +/// 4. Uses custom constructor for UDWF +/// +/// ``` +/// # use std::any::Any; +/// # +/// # use datafusion_expr::{ +/// # PartitionEvaluator, Signature, TypeSignature, Volatility, WindowUDFImpl, +/// # }; +/// # +/// # use datafusion_functions_window::{create_udwf_expr, define_udwf_and_expr, get_or_init_udwf}; +/// # use datafusion_functions_window_common::field::WindowUDFFieldArgs; +/// # +/// # use datafusion_common::arrow::datatypes::Field; +/// # use datafusion_common::ScalarValue; +/// # use datafusion_expr::{col, lit}; +/// # +/// // Creates `lead(expr, offset, default)` with 3 parameters +/// // +/// // The macros expands into this: +/// // pub fn lead( +/// // expr: datafusion_expr::Expr, +/// // offset: datafusion_expr::Expr, +/// // default: datafusion_expr::Expr, +/// // ) -> datafusion_expr::Expr { +/// // lead_udwf().call(vec![expr, offset, default]) +/// // } +/// define_udwf_and_expr!( +/// Lead, +/// lead, +/// [expr, offset, default], +/// "user-defined window function", +/// Lead::new +/// ); +/// # +/// # assert_eq!( +/// # lead(col("a"), lit(1i64), lit(ScalarValue::Null)) +/// # .name_for_alias() +/// # .unwrap(), +/// # "lead(a,Int64(1),NULL) ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING" +/// # ); +/// # +/// # #[derive(Debug)] +/// # struct Lead { +/// # signature: Signature, +/// # } +/// # +/// # impl Lead { +/// # fn new() -> Self { +/// # Self { +/// # signature: Signature::one_of( +/// # vec![ +/// # TypeSignature::Any(1), +/// # TypeSignature::Any(2), +/// # TypeSignature::Any(3), +/// # ], +/// # Volatility::Immutable, +/// # ), +/// # } +/// # } +/// # } +/// # +/// # impl WindowUDFImpl for Lead { +/// # fn as_any(&self) -> &dyn Any { +/// # self +/// # } +/// # fn name(&self) -> &str { +/// # "lead" +/// # } +/// # fn signature(&self) -> &Signature { +/// # &self.signature +/// # } +/// # fn partition_evaluator( +/// # &self, +/// # ) -> datafusion_common::Result> { +/// # unimplemented!() +/// # } +/// # fn field(&self, field_args: WindowUDFFieldArgs) -> datafusion_common::Result { +/// # Ok(Field::new( +/// # field_args.name(), +/// # field_args.get_input_type(0).unwrap(), +/// # false, +/// # )) +/// # } +/// # } +/// ``` #[macro_export] macro_rules! define_udwf_and_expr { // default constructor, zero arguments @@ -468,12 +641,12 @@ macro_rules! define_udwf_and_expr { // default constructor, 1 or more arguments ($UDWF:ident, $OUT_FN_NAME:ident, [$($PARAM:ident),+], $DOC:expr) => { get_or_init_udwf!($UDWF, $OUT_FN_NAME, $DOC); - create_udwf_expr!($UDWF, $OUT_FN_NAME, [$($PARAM:ident),+], $DOC); + create_udwf_expr!($UDWF, $OUT_FN_NAME, [$($PARAM),+], $DOC); }; // custom constructor, 1 or more arguments ($UDWF:ident, $OUT_FN_NAME:ident, [$($PARAM:ident),+], $DOC:expr, $CTOR:path) => { get_or_init_udwf!($UDWF, $OUT_FN_NAME, $DOC, $CTOR); - create_udwf_expr!($UDWF, $OUT_FN_NAME, [$($PARAM:ident),+], $DOC); + create_udwf_expr!($UDWF, $OUT_FN_NAME, [$($PARAM),+], $DOC); }; } From ff24d6a98514cbcbb647536e0d742a78ed95a839 Mon Sep 17 00:00:00 2001 From: jcsherin Date: Tue, 1 Oct 2024 19:33:50 +0530 Subject: [PATCH 28/34] Improve doc comments for usage examples --- datafusion/functions-window/src/macros.rs | 72 +++++++++++++---------- 1 file changed, 42 insertions(+), 30 deletions(-) diff --git a/datafusion/functions-window/src/macros.rs b/datafusion/functions-window/src/macros.rs index 06ebfbec0b89..9c6172c0f995 100644 --- a/datafusion/functions-window/src/macros.rs +++ b/datafusion/functions-window/src/macros.rs @@ -350,10 +350,14 @@ macro_rules! create_udwf_expr { /// # use datafusion_functions_window_common::field::WindowUDFFieldArgs; /// # use datafusion_functions_window::{define_udwf_and_expr, get_or_init_udwf, create_udwf_expr}; /// # -/// /// This creates: -/// /// 1. `add_one_udwf()`: user-defined window function and, -/// /// 2. `add_one()`: `WindowFunction` expression with zero parameters +/// /// 1. Defines the `add_one_udwf()` user-defined window function. /// /// +/// /// 2. Defines the expression API: +/// /// ``` +/// /// pub fn add_one() -> datafusion_expr::Expr { +/// /// add_one_udwf().call(vec![]) +/// /// } +/// /// ``` /// define_udwf_and_expr!( /// AddOne, /// add_one, @@ -406,10 +410,14 @@ macro_rules! create_udwf_expr { /// # use datafusion_functions_window::{create_udwf_expr, define_udwf_and_expr, get_or_init_udwf}; /// # use datafusion_functions_window_common::field::WindowUDFFieldArgs; /// # -/// /// This creates: -/// /// 1. `row_number_udwf()`: user-defined window function and, -/// /// 2. `row_number()`: `WindowFunction` expression with zero parameters +/// /// 1. Defines the `row_number_udwf()` user-defined window function. /// /// +/// /// 2. Defines the expression API: +/// /// ``` +/// /// pub fn row_number() -> datafusion_expr::Expr { +/// /// row_number_udwf().call(vec![]) +/// /// } +/// /// ``` /// define_udwf_and_expr!( /// RowNumber, /// row_number, @@ -454,7 +462,7 @@ macro_rules! create_udwf_expr { /// # } /// ``` /// -/// ## Expression API With Parameters +/// ## Expression API With Multiple Parameters /// 3. Uses default constructor for UDWF /// /// ``` @@ -471,20 +479,22 @@ macro_rules! create_udwf_expr { /// # use datafusion_common::ScalarValue; /// # use datafusion_expr::{col, lit}; /// # -/// // Creates `lead(expr, offset, default)` with 3 parameters -/// // -/// // The macros expands into this: -/// // pub fn lead( -/// // expr: datafusion_expr::Expr, -/// // offset: datafusion_expr::Expr, -/// // default: datafusion_expr::Expr, -/// // ) -> datafusion_expr::Expr { -/// // lead_udwf().call(vec![expr, offset, default]) -/// // } +/// /// 1. Defines the `lead_udwf()` user-defined window function. +/// /// +/// /// 2. Defines the expression API: +/// /// ``` +/// /// pub fn lead( +/// /// expr: datafusion_expr::Expr, +/// /// offset: datafusion_expr::Expr, +/// /// default: datafusion_expr::Expr, +/// /// ) -> datafusion_expr::Expr { +/// /// lead_udwf().call(vec![expr, offset, default]) +/// /// } +/// /// ``` /// define_udwf_and_expr!( /// Lead, /// lead, -/// [expr, offset, default], +/// [expr, offset, default], // <- 3 parameters /// "user-defined window function" /// ); /// # @@ -555,22 +565,24 @@ macro_rules! create_udwf_expr { /// # use datafusion_common::ScalarValue; /// # use datafusion_expr::{col, lit}; /// # -/// // Creates `lead(expr, offset, default)` with 3 parameters -/// // -/// // The macros expands into this: -/// // pub fn lead( -/// // expr: datafusion_expr::Expr, -/// // offset: datafusion_expr::Expr, -/// // default: datafusion_expr::Expr, -/// // ) -> datafusion_expr::Expr { -/// // lead_udwf().call(vec![expr, offset, default]) -/// // } +/// /// 1. Defines the `lead_udwf()` user-defined window function. +/// /// +/// /// 2. Defines the expression API: +/// /// ``` +/// /// pub fn lead( +/// /// expr: datafusion_expr::Expr, +/// /// offset: datafusion_expr::Expr, +/// /// default: datafusion_expr::Expr, +/// /// ) -> datafusion_expr::Expr { +/// /// lead_udwf().call(vec![expr, offset, default]) +/// /// } +/// /// ``` /// define_udwf_and_expr!( /// Lead, /// lead, -/// [expr, offset, default], +/// [expr, offset, default], // <- 3 parameters /// "user-defined window function", -/// Lead::new +/// Lead::new // <- Custom constructor /// ); /// # /// # assert_eq!( From 09709d3d68820f0e12b48c4c566c59fe02148e07 Mon Sep 17 00:00:00 2001 From: jcsherin Date: Tue, 1 Oct 2024 19:43:51 +0530 Subject: [PATCH 29/34] Rewrite inner line docs --- datafusion/functions-window/src/macros.rs | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/datafusion/functions-window/src/macros.rs b/datafusion/functions-window/src/macros.rs index 9c6172c0f995..b4e0af54f72e 100644 --- a/datafusion/functions-window/src/macros.rs +++ b/datafusion/functions-window/src/macros.rs @@ -15,11 +15,12 @@ // specific language governing permissions and limitations // under the License. -//! Macros for generating [`WindowUDF`] and a [`WindowFunction`] -//! expression with a fluent API. +//! Convenience macros for defining a user-defined window function +//! and associated expression API (fluent style). //! -//! [`WindowUDF`]: datafusion_expr::WindowUDF -//! [`WindowFunction`]: datafusion_expr::Expr::WindowFunction +//! See [`define_udwf_and_expr!`] for usage examples. +//! +//! [`define_udwf_and_expr!`]: crate::define_udwf_and_expr! /// Lazily initializes a user-defined window function exactly once /// when called concurrently. Repeated calls return a reference to the From 03f8a72232d0bd3818e941d9eaffe566c83fbb9a Mon Sep 17 00:00:00 2001 From: jcsherin Date: Tue, 1 Oct 2024 20:00:14 +0530 Subject: [PATCH 30/34] Rewrite `create_udwf_expr!` doc comments --- datafusion/functions-window/src/macros.rs | 57 ++++++++++++----------- 1 file changed, 30 insertions(+), 27 deletions(-) diff --git a/datafusion/functions-window/src/macros.rs b/datafusion/functions-window/src/macros.rs index b4e0af54f72e..1172c91fd879 100644 --- a/datafusion/functions-window/src/macros.rs +++ b/datafusion/functions-window/src/macros.rs @@ -32,7 +32,7 @@ /// of the user-defined window function. /// * `$OUT_FN_NAME`: The basename to generate a unique function name like /// `$OUT_FN_NAME_udwf`. -/// * `$DOC`: Description of user-defined window function. +/// * `$DOC`: Doc comments for UDWF. /// * (optional) `$CTOR`: When none provided it automatically resolves /// to `$UDWF::default()` (default constructor). To customize /// pass a different constructor. @@ -115,12 +115,9 @@ macro_rules! get_or_init_udwf { } /// Create a [`WindowFunction`] expression that exposes a fluent API -/// which you can use to build more complex expressions and contains -/// additional [`ExprFunctionExt`] methods for configuring user-defined -/// window functions. +/// which you can use to build more complex expressions. /// /// [`WindowFunction`]: datafusion_expr::Expr::WindowFunction -/// [`ExprFunctionExt`]: datafusion_expr::expr_fn::ExprFunctionExt /// /// # Parameters /// @@ -128,17 +125,17 @@ macro_rules! get_or_init_udwf { /// user-defined window function. /// * `$OUT_FN_NAME`: The basename to generate a unique function name like /// `$OUT_FN_NAME_udwf`. -/// * `$DOC`: Description of user-defined window function. +/// * `$DOC`: Doc comments for UDWF. /// * (optional) `[$($PARAM:ident),+]`: An array of 1 or more parameters /// for the generated function. The type of parameters is [`Expr`]. -/// This is unnecessary for functions which take no arguments. +/// This is omitted for functions with zero parameters. /// /// [`Signature`]: datafusion_expr::Signature /// [`Expr`]: datafusion_expr::Expr /// /// # Example /// -/// 1. With zero parameters +/// 1. With Zero Parameters /// ``` /// # use std::any::Any; /// # use datafusion_common::arrow::datatypes::{DataType, Field}; @@ -150,12 +147,15 @@ macro_rules! get_or_init_udwf { /// # row_number, /// # "Returns a unique row number for each row in window partition beginning at 1." /// # ); -/// // Creates `row_number()` API which has no parameters -/// // -/// // The macro expands into this: -/// // pub fn row_number() -> datafusion_expr::Expr { -/// // row_number_udwf().call(vec![]) -/// // } +/// /// Creates `row_number()` API which has zero parameters: +/// /// +/// /// ``` +/// /// /// Returns a unique row number for each row in window partition +/// /// /// beginning at 1. +/// /// pub fn row_number() -> datafusion_expr::Expr { +/// /// row_number_udwf().call(vec![]) +/// /// } +/// /// ``` /// create_udwf_expr!( /// RowNumber, /// row_number, @@ -199,7 +199,7 @@ macro_rules! get_or_init_udwf { /// # } /// ``` /// -/// 2. With at least 1 parameter +/// 2. With Multiple Parameters /// ``` /// # use std::any::Any; /// # @@ -216,21 +216,24 @@ macro_rules! get_or_init_udwf { /// # /// # get_or_init_udwf!(Lead, lead, "user-defined window function"); /// # -/// // Creates `lead(expr, offset, default)` with 3 parameters -/// // -/// // The macros expands into this: -/// // pub fn lead( -/// // expr: datafusion_expr::Expr, -/// // offset: datafusion_expr::Expr, -/// // default: datafusion_expr::Expr, -/// // ) -> datafusion_expr::Expr { -/// // lead_udwf().call(vec![expr, offset, default]) -/// // } +/// /// Creates `lead(expr, offset, default)` with 3 parameters: +/// /// +/// /// ``` +/// /// /// Returns a value evaluated at the row that is offset rows +/// /// /// after the current row within the partition. +/// /// pub fn lead( +/// /// expr: datafusion_expr::Expr, +/// /// offset: datafusion_expr::Expr, +/// /// default: datafusion_expr::Expr, +/// /// ) -> datafusion_expr::Expr { +/// /// lead_udwf().call(vec![expr, offset, default]) +/// /// } +/// /// ``` /// create_udwf_expr!( /// Lead, /// lead, /// [expr, offset, default], -/// "user-defined window function" +/// "Returns a value evaluated at the row that is offset rows after the current row within the partition." /// ); /// # /// # assert_eq!( @@ -330,7 +333,7 @@ macro_rules! create_udwf_expr { /// * (optional) `[$($PARAM:ident),+]`: An array of 1 or more parameters /// for the generated function. The type of parameters is [`Expr`]. /// This is unnecessary for functions which take no arguments. -/// * `$DOC`: Description of user-defined window function. +/// * `$DOC`: Doc comments for UDWF. /// * (optional) `$CTOR`: When none provided it automatically resolves /// to `$UDWF::default()` (default constructor). To customize /// pass a different constructor. From 60d5eccd789153656b2e42860f5b2312b7e04411 Mon Sep 17 00:00:00 2001 From: jcsherin Date: Tue, 1 Oct 2024 20:56:08 +0530 Subject: [PATCH 31/34] Minor doc improvements --- datafusion/functions-window/src/macros.rs | 22 ++++++++++++---------- 1 file changed, 12 insertions(+), 10 deletions(-) diff --git a/datafusion/functions-window/src/macros.rs b/datafusion/functions-window/src/macros.rs index 1172c91fd879..db4b76a88c79 100644 --- a/datafusion/functions-window/src/macros.rs +++ b/datafusion/functions-window/src/macros.rs @@ -33,9 +33,8 @@ /// * `$OUT_FN_NAME`: The basename to generate a unique function name like /// `$OUT_FN_NAME_udwf`. /// * `$DOC`: Doc comments for UDWF. -/// * (optional) `$CTOR`: When none provided it automatically resolves -/// to `$UDWF::default()` (default constructor). To customize -/// pass a different constructor. +/// * (optional) `$CTOR`: Pass a custom constructor. When omitted it +/// automatically resolves to `$UDWF::default()`. /// /// # Example /// @@ -47,8 +46,12 @@ /// # use datafusion_functions_window_common::field::WindowUDFFieldArgs; /// # use datafusion_functions_window::get_or_init_udwf; /// # -/// /// This creates `add_one_udwf()` from `AddOne`. -/// get_or_init_udwf!(AddOne, add_one, "Adds one to each row value in window partition."); +/// /// Defines the `add_one_udwf()` user-defined window function. +/// get_or_init_udwf!( +/// AddOne, +/// add_one, +/// "Adds one to each row value in window partition." +/// ); /// # /// # assert_eq!(add_one_udwf().name(), "add_one"); /// # @@ -128,7 +131,7 @@ macro_rules! get_or_init_udwf { /// * `$DOC`: Doc comments for UDWF. /// * (optional) `[$($PARAM:ident),+]`: An array of 1 or more parameters /// for the generated function. The type of parameters is [`Expr`]. -/// This is omitted for functions with zero parameters. +/// When omitted this creates a function with zero parameters. /// /// [`Signature`]: datafusion_expr::Signature /// [`Expr`]: datafusion_expr::Expr @@ -332,11 +335,10 @@ macro_rules! create_udwf_expr { /// `$OUT_FN_NAME_udwf`. /// * (optional) `[$($PARAM:ident),+]`: An array of 1 or more parameters /// for the generated function. The type of parameters is [`Expr`]. -/// This is unnecessary for functions which take no arguments. +/// When omitted this creates a function with zero parameters. /// * `$DOC`: Doc comments for UDWF. -/// * (optional) `$CTOR`: When none provided it automatically resolves -/// to `$UDWF::default()` (default constructor). To customize -/// pass a different constructor. +/// * (optional) `$CTOR`: Pass a custom constructor. When omitted it +/// automatically resolves to `$UDWF::default()`. /// /// [`Signature`]: datafusion_expr::Signature /// [`Expr`]: datafusion_expr::Expr From 12197b5a41d30264aed6feb3a134303444cd614c Mon Sep 17 00:00:00 2001 From: jcsherin Date: Tue, 1 Oct 2024 21:04:56 +0530 Subject: [PATCH 32/34] Fix doc test and usage example --- datafusion/functions-window/src/macros.rs | 46 +++++++++++------------ 1 file changed, 23 insertions(+), 23 deletions(-) diff --git a/datafusion/functions-window/src/macros.rs b/datafusion/functions-window/src/macros.rs index db4b76a88c79..0bba8c243fef 100644 --- a/datafusion/functions-window/src/macros.rs +++ b/datafusion/functions-window/src/macros.rs @@ -46,34 +46,34 @@ /// # use datafusion_functions_window_common::field::WindowUDFFieldArgs; /// # use datafusion_functions_window::get_or_init_udwf; /// # -/// /// Defines the `add_one_udwf()` user-defined window function. +/// /// Defines the `simple_udwf()` user-defined window function. /// get_or_init_udwf!( -/// AddOne, -/// add_one, -/// "Adds one to each row value in window partition." +/// SimpleUDWF, +/// simple, +/// "Simple user-defined window function doc comment." /// ); /// # -/// # assert_eq!(add_one_udwf().name(), "add_one"); +/// # assert_eq!(simple_udwf().name(), "simple_user_defined_window_function"); /// # /// # #[derive(Debug)] -/// # struct AddOne { +/// # struct SimpleUDWF { /// # signature: Signature, /// # } /// # -/// # impl Default for AddOne { +/// # impl Default for SimpleUDWF { /// # fn default() -> Self { /// # Self { -/// # signature: Signature::numeric(1, Volatility::Immutable), +/// # signature: Signature::any(0, Volatility::Immutable), /// # } /// # } /// # } /// # -/// # impl WindowUDFImpl for AddOne { +/// # impl WindowUDFImpl for SimpleUDWF { /// # fn as_any(&self) -> &dyn Any { /// # self /// # } /// # fn name(&self) -> &str { -/// # "add_one" +/// # "simple_user_defined_window_function" /// # } /// # fn signature(&self) -> &Signature { /// # &self.signature @@ -81,7 +81,7 @@ /// # fn partition_evaluator( /// # &self, /// # ) -> datafusion_common::Result> { -/// # unimplemented!("unnecessary for doc test") +/// # unimplemented!() /// # } /// # fn field(&self, field_args: WindowUDFFieldArgs) -> datafusion_common::Result { /// # Ok(Field::new(field_args.name(), DataType::Int64, false)) @@ -356,41 +356,41 @@ macro_rules! create_udwf_expr { /// # use datafusion_functions_window_common::field::WindowUDFFieldArgs; /// # use datafusion_functions_window::{define_udwf_and_expr, get_or_init_udwf, create_udwf_expr}; /// # -/// /// 1. Defines the `add_one_udwf()` user-defined window function. +/// /// 1. Defines the `simple_udwf()` user-defined window function. /// /// /// /// 2. Defines the expression API: /// /// ``` -/// /// pub fn add_one() -> datafusion_expr::Expr { -/// /// add_one_udwf().call(vec![]) +/// /// pub fn simple() -> datafusion_expr::Expr { +/// /// simple_udwf().call(vec![]) /// /// } /// /// ``` /// define_udwf_and_expr!( -/// AddOne, -/// add_one, +/// SimpleUDWF, +/// simple, /// "Adds one to each row value in window partition." /// ); /// # -/// # assert_eq!(add_one_udwf().name(), "add_one"); +/// # assert_eq!(simple_udwf().name(), "simple_user_defined_window_function"); /// # /// # #[derive(Debug)] -/// # struct AddOne { +/// # struct SimpleUDWF { /// # signature: Signature, /// # } /// # -/// # impl Default for AddOne { +/// # impl Default for SimpleUDWF { /// # fn default() -> Self { /// # Self { -/// # signature: Signature::numeric(1, Volatility::Immutable), +/// # signature: Signature::any(0, Volatility::Immutable), /// # } /// # } /// # } /// # -/// # impl WindowUDFImpl for AddOne { +/// # impl WindowUDFImpl for SimpleUDWF { /// # fn as_any(&self) -> &dyn Any { /// # self /// # } /// # fn name(&self) -> &str { -/// # "add_one" +/// # "simple_user_defined_window_function" /// # } /// # fn signature(&self) -> &Signature { /// # &self.signature @@ -398,7 +398,7 @@ macro_rules! create_udwf_expr { /// # fn partition_evaluator( /// # &self, /// # ) -> datafusion_common::Result> { -/// # unimplemented!("unnecessary for doc test") +/// # unimplemented!() /// # } /// # fn field(&self, field_args: WindowUDFFieldArgs) -> datafusion_common::Result { /// # Ok(Field::new(field_args.name(), DataType::Int64, false)) From 4e6ee30030951dec8f42c3e87b98593f4b9aa483 Mon Sep 17 00:00:00 2001 From: jcsherin Date: Tue, 1 Oct 2024 22:15:42 +0530 Subject: [PATCH 33/34] Add inline comments for macro patterns --- datafusion/functions-window/src/macros.rs | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/datafusion/functions-window/src/macros.rs b/datafusion/functions-window/src/macros.rs index 0bba8c243fef..fcc46fe44608 100644 --- a/datafusion/functions-window/src/macros.rs +++ b/datafusion/functions-window/src/macros.rs @@ -644,25 +644,29 @@ macro_rules! create_udwf_expr { /// ``` #[macro_export] macro_rules! define_udwf_and_expr { - // default constructor, zero arguments + // Defines UDWF with default constructor + // Defines expression API with zero parameters ($UDWF:ident, $OUT_FN_NAME:ident, $DOC:expr) => { get_or_init_udwf!($UDWF, $OUT_FN_NAME, $DOC); create_udwf_expr!($UDWF, $OUT_FN_NAME, $DOC); }; - // custom constructor, zero arguments + // Defines UDWF by passing a custom constructor + // Defines expression API with zero parameters ($UDWF:ident, $OUT_FN_NAME:ident, $DOC:expr, $CTOR:path) => { get_or_init_udwf!($UDWF, $OUT_FN_NAME, $DOC, $CTOR); create_udwf_expr!($UDWF, $OUT_FN_NAME, $DOC); }; - // default constructor, 1 or more arguments + // Defines UDWF with default constructor + // Defines expression API with multiple parameters ($UDWF:ident, $OUT_FN_NAME:ident, [$($PARAM:ident),+], $DOC:expr) => { get_or_init_udwf!($UDWF, $OUT_FN_NAME, $DOC); create_udwf_expr!($UDWF, $OUT_FN_NAME, [$($PARAM),+], $DOC); }; - // custom constructor, 1 or more arguments + // Defines UDWF by passing a custom constructor + // Defines expression API with multiple parameters ($UDWF:ident, $OUT_FN_NAME:ident, [$($PARAM:ident),+], $DOC:expr, $CTOR:path) => { get_or_init_udwf!($UDWF, $OUT_FN_NAME, $DOC, $CTOR); create_udwf_expr!($UDWF, $OUT_FN_NAME, [$($PARAM),+], $DOC); From 35e2f1a02b6a7f1b711152a9c680da17d9249256 Mon Sep 17 00:00:00 2001 From: jcsherin Date: Wed, 2 Oct 2024 17:19:02 +0530 Subject: [PATCH 34/34] Minor: change doc comment in example --- datafusion/functions-window/src/macros.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/datafusion/functions-window/src/macros.rs b/datafusion/functions-window/src/macros.rs index fcc46fe44608..843d8ecb38cc 100644 --- a/datafusion/functions-window/src/macros.rs +++ b/datafusion/functions-window/src/macros.rs @@ -367,7 +367,7 @@ macro_rules! create_udwf_expr { /// define_udwf_and_expr!( /// SimpleUDWF, /// simple, -/// "Adds one to each row value in window partition." +/// "a simple user-defined window function" /// ); /// # /// # assert_eq!(simple_udwf().name(), "simple_user_defined_window_function");