From 3c662010f9d133cda65749b119a1b1731edbe4e5 Mon Sep 17 00:00:00 2001 From: Michael J Ward Date: Sun, 10 Nov 2024 11:02:01 -0600 Subject: [PATCH] Upgrade to Datafusion 43 (#905) * patch datafusion deps * migrate from deprecated RuntimeEnv::new to RuntimeEnv::try_new Ref: https://github.com/apache/datafusion/pull/12566 * remove Arc from create_udf call Ref: https://github.com/apache/datafusion/pull/12489 * doc typo * migrage new UnnestOptions API Ref: https://github.com/apache/datafusion/pull/12836/files * update API for logical expr Limit Ref: https://github.com/apache/datafusion/pull/12836 * remove logical expr CrossJoin It was removed upstream. Ref: https://github.com/apache/datafusion/pull/13076 * update PyWindowUDF Ref: https://github.com/apache/datafusion/issues/12803 * migrate window functions lead and lag to udwf Ref: https://github.com/apache/datafusion/issues/12802 * migrate window functions rank, dense_rank, and percent_rank to udwf Ref: https://github.com/apache/datafusion/issues/12648 * convert window function cume_dist to udwf Ref: https://github.com/apache/datafusion/issues/12695 * convert window function ntile to udwf Ref: https://github.com/apache/datafusion/issues/12694 * clean up functions_window invocation * Only one column was being passed to udwf * Update to DF 43.0.0 * Update tests to look for string_view type * String view is now the default type for strings * Making a variety of adjustments in wrappers and unit tests to account for the switch from string to string_view as default * Resolve errors in doc building --------- Co-authored-by: Tim Saucer --- Cargo.lock | 373 ++++++++++++++++++--------------- Cargo.toml | 9 +- examples/tpch/_tests.py | 4 +- python/datafusion/expr.py | 4 +- python/datafusion/functions.py | 11 +- python/datafusion/udf.py | 1 + python/tests/test_expr.py | 16 +- python/tests/test_functions.py | 67 ++++-- python/tests/test_imports.py | 2 - python/tests/test_sql.py | 7 + src/context.rs | 2 +- src/dataframe.rs | 8 +- src/expr.rs | 2 - src/expr/cross_join.rs | 94 --------- src/expr/limit.rs | 22 +- src/functions.rs | 18 +- src/sql/logical.rs | 3 +- src/udf.rs | 4 +- src/udwf.rs | 29 ++- 19 files changed, 338 insertions(+), 338 deletions(-) delete mode 100644 src/expr/cross_join.rs diff --git a/Cargo.lock b/Cargo.lock index 0835f219e..497c5b850 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -84,9 +84,9 @@ dependencies = [ [[package]] name = "anyhow" -version = "1.0.89" +version = "1.0.91" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "86fdf8605db99b54d3cd748a44c6d04df638eb5dafb219b135d0149bd0db01f6" +checksum = "c042108f3ed77fd83760a5fd79b53be043192bb3b9dba91d8c574c0ada7850c8" [[package]] name = "apache-avro" @@ -130,9 +130,9 @@ checksum = "7c02d123df017efcdfbd739ef81735b36c5ba83ec3c59c80a9d7ecc718f92e50" [[package]] name = "arrow" -version = "53.1.0" +version = "53.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a9ba0d7248932f4e2a12fb37f0a2e3ec82b3bdedbac2a1dce186e036843b8f8c" +checksum = "4caf25cdc4a985f91df42ed9e9308e1adbcd341a31a72605c697033fcef163e3" dependencies = [ "arrow-arith", "arrow-array", @@ -152,9 +152,9 @@ dependencies = [ [[package]] name = "arrow-arith" -version = "53.1.0" +version = "53.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d60afcdc004841a5c8d8da4f4fa22d64eb19c0c01ef4bcedd77f175a7cf6e38f" +checksum = "91f2dfd1a7ec0aca967dfaa616096aec49779adc8eccec005e2f5e4111b1192a" dependencies = [ "arrow-array", "arrow-buffer", @@ -167,9 +167,9 @@ dependencies = [ [[package]] name = "arrow-array" -version = "53.1.0" +version = "53.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7f16835e8599dbbb1659fd869d865254c4cf32c6c2bb60b6942ac9fc36bfa5da" +checksum = "d39387ca628be747394890a6e47f138ceac1aa912eab64f02519fed24b637af8" dependencies = [ "ahash", "arrow-buffer", @@ -184,9 +184,9 @@ dependencies = [ [[package]] name = "arrow-buffer" -version = "53.1.0" +version = "53.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1a1f34f0faae77da6b142db61deba2cb6d60167592b178be317b341440acba80" +checksum = "9e51e05228852ffe3eb391ce7178a0f97d2cf80cc6ef91d3c4a6b3cb688049ec" dependencies = [ "bytes", "half", @@ -195,9 +195,9 @@ dependencies = [ [[package]] name = "arrow-cast" -version = "53.1.0" +version = "53.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "450e4abb5775bca0740bec0bcf1b1a5ae07eff43bd625661c4436d8e8e4540c4" +checksum = "d09aea56ec9fa267f3f3f6cdab67d8a9974cbba90b3aa38c8fe9d0bb071bd8c1" dependencies = [ "arrow-array", "arrow-buffer", @@ -216,9 +216,9 @@ dependencies = [ [[package]] name = "arrow-csv" -version = "53.1.0" +version = "53.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d3a4e4d63830a341713e35d9a42452fbc6241d5f42fa5cf6a4681b8ad91370c4" +checksum = "c07b5232be87d115fde73e32f2ca7f1b353bff1b44ac422d3c6fc6ae38f11f0d" dependencies = [ "arrow-array", "arrow-buffer", @@ -235,9 +235,9 @@ dependencies = [ [[package]] name = "arrow-data" -version = "53.1.0" +version = "53.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2b1e618bbf714c7a9e8d97203c806734f012ff71ae3adc8ad1b075689f540634" +checksum = "b98ae0af50890b494cebd7d6b04b35e896205c1d1df7b29a6272c5d0d0249ef5" dependencies = [ "arrow-buffer", "arrow-schema", @@ -247,9 +247,9 @@ dependencies = [ [[package]] name = "arrow-ipc" -version = "53.1.0" +version = "53.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f98e983549259a2b97049af7edfb8f28b8911682040e99a94e4ceb1196bd65c2" +checksum = "0ed91bdeaff5a1c00d28d8f73466bcb64d32bbd7093b5a30156b4b9f4dba3eee" dependencies = [ "arrow-array", "arrow-buffer", @@ -262,9 +262,9 @@ dependencies = [ [[package]] name = "arrow-json" -version = "53.1.0" +version = "53.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b198b9c6fcf086501730efbbcb483317b39330a116125af7bb06467d04b352a3" +checksum = "0471f51260a5309307e5d409c9dc70aede1cd9cf1d4ff0f0a1e8e1a2dd0e0d3c" dependencies = [ "arrow-array", "arrow-buffer", @@ -282,9 +282,9 @@ dependencies = [ [[package]] name = "arrow-ord" -version = "53.1.0" +version = "53.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2427f37b4459a4b9e533045abe87a5183a5e0995a3fc2c2fd45027ae2cc4ef3f" +checksum = "2883d7035e0b600fb4c30ce1e50e66e53d8656aa729f2bfa4b51d359cf3ded52" dependencies = [ "arrow-array", "arrow-buffer", @@ -297,9 +297,9 @@ dependencies = [ [[package]] name = "arrow-row" -version = "53.1.0" +version = "53.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "15959657d92e2261a7a323517640af87f5afd9fd8a6492e424ebee2203c567f6" +checksum = "552907e8e587a6fde4f8843fd7a27a576a260f65dab6c065741ea79f633fc5be" dependencies = [ "ahash", "arrow-array", @@ -311,18 +311,18 @@ dependencies = [ [[package]] name = "arrow-schema" -version = "53.1.0" +version = "53.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fbf0388a18fd7f7f3fe3de01852d30f54ed5182f9004db700fbe3ba843ed2794" +checksum = "539ada65246b949bd99ffa0881a9a15a4a529448af1a07a9838dd78617dafab1" dependencies = [ "bitflags 2.6.0", ] [[package]] name = "arrow-select" -version = "53.1.0" +version = "53.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b83e5723d307a38bf00ecd2972cd078d1339c7fd3eb044f609958a9a24463f3a" +checksum = "6259e566b752da6dceab91766ed8b2e67bf6270eb9ad8a6e07a33c1bede2b125" dependencies = [ "ahash", "arrow-array", @@ -334,9 +334,9 @@ dependencies = [ [[package]] name = "arrow-string" -version = "53.1.0" +version = "53.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7ab3db7c09dd826e74079661d84ed01ed06547cf75d52c2818ef776d0d852305" +checksum = "f3179ccbd18ebf04277a095ba7321b93fd1f774f18816bd5f6b3ce2f594edb6c" dependencies = [ "arrow-array", "arrow-buffer", @@ -351,9 +351,9 @@ dependencies = [ [[package]] name = "async-compression" -version = "0.4.13" +version = "0.4.17" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7e614738943d3f68c628ae3dbce7c3daffb196665f82f8c8ea6b65de73c79429" +checksum = "0cb8f1d480b0ea3783ab015936d2a55c87e219676f0c0b7dec61494043f21857" dependencies = [ "bzip2", "flate2", @@ -482,9 +482,9 @@ dependencies = [ [[package]] name = "brotli" -version = "6.0.0" +version = "7.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "74f7971dbd9326d58187408ab83117d8ac1bb9c17b085fdacd1cf2f598719b6b" +checksum = "cc97b8f16f944bba54f0433f07e30be199b6dc2bd25937444bbad560bcea29bd" dependencies = [ "alloc-no-stdlib", "alloc-stdlib", @@ -515,9 +515,9 @@ checksum = "1fd0f2584146f6f2ef48085050886acf353beff7305ebd1ae69500e27c67f64b" [[package]] name = "bytes" -version = "1.7.2" +version = "1.8.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "428d9aa8fbc0670b7b8d6030a7fadd0f86151cae55e4dbbece15f3780a3dfaf3" +checksum = "9ac0150caa2ae65ca5bd83f25c7de183dea78d4d366469f148435e2acfbad0da" [[package]] name = "bzip2" @@ -542,9 +542,9 @@ dependencies = [ [[package]] name = "cc" -version = "1.1.28" +version = "1.1.31" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2e80e3b6a3ab07840e1cae9b0666a63970dc28e8ed5ffbcdacbfc760c281bfc1" +checksum = "c2e7962b54006dcfcc61cb72735f4d89bb97061dd6a7ed882ec6b8ee53714c6f" dependencies = [ "jobserver", "libc", @@ -557,6 +557,12 @@ version = "1.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" +[[package]] +name = "cfg_aliases" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "613afe47fcd5fac7ccf1db93babcb082c5994d996f20b8b159f2ad1658eb5724" + [[package]] name = "chrono" version = "0.4.38" @@ -725,9 +731,9 @@ dependencies = [ [[package]] name = "dary_heap" -version = "0.3.6" +version = "0.3.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7762d17f1241643615821a8455a0b2c3e803784b058693d990b11f2dce25a0ca" +checksum = "04d2cd9c18b9f454ed67da600630b021a8a80bf33f8c95896ab33aaf1c26b728" [[package]] name = "dashmap" @@ -745,9 +751,9 @@ dependencies = [ [[package]] name = "datafusion" -version = "42.0.0" +version = "43.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ee907b081e45e1d14e1f327e89ef134f91fcebad0bfc2dc229fa9f6044379682" +checksum = "cbba0799cf6913b456ed07a94f0f3b6e12c62a5d88b10809e2284a0f2b915c05" dependencies = [ "ahash", "apache-avro", @@ -804,9 +810,9 @@ dependencies = [ [[package]] name = "datafusion-catalog" -version = "42.0.0" +version = "43.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6c2b914f6e33c429af7d8696c72a47ed9225d7e2b82c747ebdfa2408ed53579f" +checksum = "7493c5c2d40eec435b13d92e5703554f4efc7059451fcb8d3a79580ff0e45560" dependencies = [ "arrow-schema", "async-trait", @@ -819,9 +825,9 @@ dependencies = [ [[package]] name = "datafusion-common" -version = "42.0.0" +version = "43.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3a84f8e76330c582a6b8ada0b2c599ca46cfe46b7585e458fc3f4092bc722a18" +checksum = "24953049ebbd6f8964f91f60aa3514e121b5e81e068e33b60e77815ab369b25c" dependencies = [ "ahash", "apache-avro", @@ -832,6 +838,7 @@ dependencies = [ "chrono", "half", "hashbrown 0.14.5", + "indexmap", "instant", "libc", "num_cpus", @@ -845,9 +852,9 @@ dependencies = [ [[package]] name = "datafusion-common-runtime" -version = "42.0.0" +version = "43.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cf08cc30d92720d557df13bd5a5696213bd5ea0f38a866d8d85055d866fba774" +checksum = "f06df4ef76872e11c924d3c814fd2a8dd09905ed2e2195f71c857d78abd19685" dependencies = [ "log", "tokio", @@ -855,9 +862,9 @@ dependencies = [ [[package]] name = "datafusion-execution" -version = "42.0.0" +version = "43.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "86bc4183d5c45b9f068a6f351678a0d1eb1225181424542bb75db18ec280b822" +checksum = "6bbdcb628d690f3ce5fea7de81642b514486d58ff9779a51f180a69a4eadb361" dependencies = [ "arrow", "chrono", @@ -876,9 +883,9 @@ dependencies = [ [[package]] name = "datafusion-expr" -version = "42.0.0" +version = "43.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "202119ce58e4d103e37ae64aab40d4e574c97bdd2bea994bf307b175fcbfa74d" +checksum = "8036495980e3131f706b7d33ab00b4492d73dc714e3cb74d11b50f9602a73246" dependencies = [ "ahash", "arrow", @@ -888,7 +895,9 @@ dependencies = [ "datafusion-common", "datafusion-expr-common", "datafusion-functions-aggregate-common", + "datafusion-functions-window-common", "datafusion-physical-expr-common", + "indexmap", "paste", "serde_json", "sqlparser", @@ -898,20 +907,21 @@ dependencies = [ [[package]] name = "datafusion-expr-common" -version = "42.0.0" +version = "43.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f8b181ce8569216abb01ef3294aa16c0a40d7d39350c2ff01ede00f167a535f2" +checksum = "4da0f3cb4669f9523b403d6b5a0ec85023e0ab3bf0183afd1517475b3e64fdd2" dependencies = [ "arrow", "datafusion-common", + "itertools", "paste", ] [[package]] name = "datafusion-functions" -version = "42.0.0" +version = "43.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6e4124b8066444e05a24472f852e94cf56546c0f4d92d00f018f207216902712" +checksum = "f52c4012648b34853e40a2c6bcaa8772f837831019b68aca384fb38436dba162" dependencies = [ "arrow", "arrow-buffer", @@ -936,9 +946,9 @@ dependencies = [ [[package]] name = "datafusion-functions-aggregate" -version = "42.0.0" +version = "43.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b94acdac235ea21810150a89751617ef2db7e32eba27f54be48a81bde2bfe119" +checksum = "e5b8bb624597ba28ed7446df4a9bd7c7a7bde7c578b6b527da3f47371d5f6741" dependencies = [ "ahash", "arrow", @@ -950,16 +960,16 @@ dependencies = [ "datafusion-physical-expr", "datafusion-physical-expr-common", "half", + "indexmap", "log", "paste", - "sqlparser", ] [[package]] name = "datafusion-functions-aggregate-common" -version = "42.0.0" +version = "43.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5c9ea085bbf900bf16e2ca0f56fc56236b2e4f2e1a2cccb67bcd83c5ab4ad0ef" +checksum = "6fb06208fc470bc8cf1ce2d9a1159d42db591f2c7264a8c1776b53ad8f675143" dependencies = [ "ahash", "arrow", @@ -971,9 +981,9 @@ dependencies = [ [[package]] name = "datafusion-functions-nested" -version = "42.0.0" +version = "43.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6c882e61665ed60c5ce9b061c1e587aeb8ae5ae4bcb5e5f2465139ab25328e0f" +checksum = "fca25bbb87323716d05e54114666e942172ccca23c5a507e9c7851db6e965317" dependencies = [ "arrow", "arrow-array", @@ -994,21 +1004,34 @@ dependencies = [ [[package]] name = "datafusion-functions-window" -version = "42.0.0" +version = "43.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "98a354ce96df3ca6d025093adac9fd55ca09931c9b6f2630140721a95873fde4" +checksum = "5ae23356c634e54c59f7c51acb7a5b9f6240ffb2cf997049a1a24a8a88598dbe" dependencies = [ "datafusion-common", "datafusion-expr", + "datafusion-functions-window-common", + "datafusion-physical-expr", "datafusion-physical-expr-common", "log", + "paste", +] + +[[package]] +name = "datafusion-functions-window-common" +version = "43.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d4b3d6ff7794acea026de36007077a06b18b89e4f9c3fea7f2215f9f7dd9059b" +dependencies = [ + "datafusion-common", + "datafusion-physical-expr-common", ] [[package]] name = "datafusion-optimizer" -version = "42.0.0" +version = "43.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "baf677c74fb7b5a1899ef52709e4a70fff3ed80bdfb4bbe495909810e83d5f39" +checksum = "bec6241eb80c595fa0e1a8a6b69686b5cf3bd5fdacb8319582a0943b0bd788aa" dependencies = [ "arrow", "async-trait", @@ -1026,9 +1049,9 @@ dependencies = [ [[package]] name = "datafusion-physical-expr" -version = "42.0.0" +version = "43.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "30b077999f6eb6c43d6b25bc66332a3be2f693c382840f008dd763b8540f9530" +checksum = "3370357b8fc75ec38577700644e5d1b0bc78f38babab99c0b8bd26bafb3e4335" dependencies = [ "ahash", "arrow", @@ -1037,30 +1060,26 @@ dependencies = [ "arrow-ord", "arrow-schema", "arrow-string", - "base64 0.22.1", "chrono", "datafusion-common", - "datafusion-execution", "datafusion-expr", "datafusion-expr-common", "datafusion-functions-aggregate-common", "datafusion-physical-expr-common", "half", "hashbrown 0.14.5", - "hex", "indexmap", "itertools", "log", "paste", "petgraph", - "regex", ] [[package]] name = "datafusion-physical-expr-common" -version = "42.0.0" +version = "43.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "dce847f885c2b13bbe29f5c8b7948797131aa470af6e16d2a94f4428b4f4f1bd" +checksum = "b8b7734d94bf2fa6f6e570935b0ddddd8421179ce200065be97874e13d46a47b" dependencies = [ "ahash", "arrow", @@ -1072,13 +1091,15 @@ dependencies = [ [[package]] name = "datafusion-physical-optimizer" -version = "42.0.0" +version = "43.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d13238e3b9fdd62a4c18760bfef714bb990d1e1d3430e9f416aae4b3cfaa71af" +checksum = "7eee8c479522df21d7b395640dff88c5ed05361852dce6544d7c98e9dbcebffe" dependencies = [ + "arrow", "arrow-schema", "datafusion-common", "datafusion-execution", + "datafusion-expr-common", "datafusion-physical-expr", "datafusion-physical-plan", "itertools", @@ -1086,9 +1107,9 @@ dependencies = [ [[package]] name = "datafusion-physical-plan" -version = "42.0.0" +version = "43.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "faba6f55a7eaf0241d07d12c2640de52742646b10f754485d5192bdfe2c9ceae" +checksum = "17e1fc2e2c239d14e8556f2622b19a726bf6bc6962cc00c71fc52626274bee24" dependencies = [ "ahash", "arrow", @@ -1102,8 +1123,8 @@ dependencies = [ "datafusion-common-runtime", "datafusion-execution", "datafusion-expr", - "datafusion-functions-aggregate", "datafusion-functions-aggregate-common", + "datafusion-functions-window-common", "datafusion-physical-expr", "datafusion-physical-expr-common", "futures", @@ -1121,9 +1142,9 @@ dependencies = [ [[package]] name = "datafusion-proto" -version = "42.0.0" +version = "43.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "585357d621fa03ea85a7fefca79ebc5ef0ee13a7f82be0762a414879a4d190a7" +checksum = "f730f7fc5a20134d4e5ecdf7bbf392002ac58163d58423ea28a702dc077b06e1" dependencies = [ "arrow", "chrono", @@ -1137,9 +1158,9 @@ dependencies = [ [[package]] name = "datafusion-proto-common" -version = "42.0.0" +version = "43.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4db6534382f92f528bdb5d925b4214c31ffd84fa7fe1eff3ed0d2f1286851ab8" +checksum = "12c225fe49e4f943e35446b263613ada7a9e9f8d647544e6b07037b9803567df" dependencies = [ "arrow", "chrono", @@ -1155,6 +1176,7 @@ dependencies = [ "arrow", "async-trait", "datafusion", + "datafusion-functions-window-common", "datafusion-proto", "datafusion-substrait", "futures", @@ -1171,15 +1193,16 @@ dependencies = [ [[package]] name = "datafusion-sql" -version = "42.0.0" +version = "43.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "dad8d96a9b52e1aa24f9373696a815be828193efce7cb0bbd2140b6bb67d1819" +checksum = "63e3a4ed41dbee20a5d947a59ca035c225d67dc9cbe869c10f66dcdf25e7ce51" dependencies = [ "arrow", "arrow-array", "arrow-schema", "datafusion-common", "datafusion-expr", + "indexmap", "log", "regex", "sqlparser", @@ -1188,9 +1211,9 @@ dependencies = [ [[package]] name = "datafusion-substrait" -version = "42.0.0" +version = "43.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f92b1b80e98bf5a9921bf118816e0e766d18527e343153321fcccfe4d68c5c45" +checksum = "8b9c768d2b4c4485c43afbaeeb86dd1f2ac3fb34a9e6e8c8b06180d2a223d5ba" dependencies = [ "arrow-buffer", "async-recursion", @@ -1530,9 +1553,9 @@ checksum = "9a3a5bfb195931eeb336b2a7b4d761daec841b97f947d34394601737a7bba5e4" [[package]] name = "hyper" -version = "1.4.1" +version = "1.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "50dfd22e0e76d0f662d429a5f80fcaf3855009297eab6a0a9f8543834744ba05" +checksum = "bbbff0a806a4728c99295b254c8838933b5b082d75e3cb70c8dab21fdfbcfa9a" dependencies = [ "bytes", "futures-channel", @@ -1568,9 +1591,9 @@ dependencies = [ [[package]] name = "hyper-util" -version = "0.1.9" +version = "0.1.10" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "41296eb09f183ac68eec06e03cdbea2e759633d4067b2f6552fc2e009bcad08b" +checksum = "df2dcfbe0677734ab2f3ffa7fa7bfd4706bfdc1ef393f2ee30184aed67e631b4" dependencies = [ "bytes", "futures-channel", @@ -1684,9 +1707,9 @@ dependencies = [ [[package]] name = "js-sys" -version = "0.3.70" +version = "0.3.72" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1868808506b929d7b0cfa8f75951347aa71bb21144b7791bae35d9bccfcfe37a" +checksum = "6a88f1bda2bd75b0452a14784937d796722fdebfe50df998aeb3f0b7603019a9" dependencies = [ "wasm-bindgen", ] @@ -1763,9 +1786,9 @@ dependencies = [ [[package]] name = "libc" -version = "0.2.159" +version = "0.2.161" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "561d97a539a36e26a9a5fad1ea11a3039a67714694aaa379433e580854bc3dc5" +checksum = "8e9489c2807c139ffd9c1794f4af0ebe86a828db53ecdc7fea2111d0fed085d1" [[package]] name = "libflate" @@ -1793,9 +1816,9 @@ dependencies = [ [[package]] name = "libm" -version = "0.2.8" +version = "0.2.11" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4ec2a862134d2a7d32d7983ddcdd1c4923530833c9f2ea1a44fc5fa473989058" +checksum = "8355be11b20d696c8f18f6cc018c4e372165b1fa8126cef092399c9951984ffa" [[package]] name = "libmimalloc-sys" @@ -2011,9 +2034,9 @@ dependencies = [ [[package]] name = "object_store" -version = "0.11.0" +version = "0.11.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "25a0c4b3a0e31f8b66f71ad8064521efa773910196e2cde791436f13409f3b45" +checksum = "6eb4c22c6154a1e759d7099f9ffad7cc5ef8245f9efbab4a41b92623079c82f3" dependencies = [ "async-trait", "base64 0.22.1", @@ -2086,9 +2109,9 @@ dependencies = [ [[package]] name = "parquet" -version = "53.1.0" +version = "53.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "310c46a70a3ba90d98fec39fa2da6d9d731e544191da6fb56c9d199484d0dd3e" +checksum = "dea02606ba6f5e856561d8d507dba8bac060aefca2a6c0f1aa1d361fed91ff3e" dependencies = [ "ahash", "arrow-array", @@ -2228,9 +2251,9 @@ dependencies = [ [[package]] name = "pin-project-lite" -version = "0.2.14" +version = "0.2.15" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bda66fc9667c18cb2758a2ac84d1167245054bcf85d5d1aaa6923f45801bdd02" +checksum = "915a1e146535de9163f3987b8944ed8cf49a18bb0056bcebcdcece385cece4ff" [[package]] name = "pin-utils" @@ -2261,9 +2284,9 @@ dependencies = [ [[package]] name = "prettyplease" -version = "0.2.22" +version = "0.2.25" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "479cf940fbbb3426c32c5d5176f62ad57549a0bb84773423ba8be9d089f5faba" +checksum = "64d1ec885c64d0457d564db4ec299b2dae3f9c02808b8ad9c3a089c591b18033" dependencies = [ "proc-macro2", "syn", @@ -2271,9 +2294,9 @@ dependencies = [ [[package]] name = "proc-macro2" -version = "1.0.86" +version = "1.0.89" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5e719e8df665df0d1c8fbfd238015744736151d4445ec0836b8e628aae103b77" +checksum = "f139b0662de085916d1fb67d2b4169d1addddda1919e696f3252b740b629986e" dependencies = [ "unicode-ident", ] @@ -2342,9 +2365,9 @@ dependencies = [ [[package]] name = "pyo3" -version = "0.22.4" +version = "0.22.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "00e89ce2565d6044ca31a3eb79a334c3a79a841120a98f64eea9f579564cb691" +checksum = "3d922163ba1f79c04bc49073ba7b32fd5a8d3b76a87c955921234b8e77333c51" dependencies = [ "cfg-if", "indoc", @@ -2360,9 +2383,9 @@ dependencies = [ [[package]] name = "pyo3-build-config" -version = "0.22.4" +version = "0.22.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d8afbaf3abd7325e08f35ffb8deb5892046fcb2608b703db6a583a5ba4cea01e" +checksum = "bc38c5feeb496c8321091edf3d63e9a6829eab4b863b4a6a65f26f3e9cc6b179" dependencies = [ "once_cell", "target-lexicon", @@ -2370,9 +2393,9 @@ dependencies = [ [[package]] name = "pyo3-ffi" -version = "0.22.4" +version = "0.22.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ec15a5ba277339d04763f4c23d85987a5b08cbb494860be141e6a10a8eb88022" +checksum = "94845622d88ae274d2729fcefc850e63d7a3ddff5e3ce11bd88486db9f1d357d" dependencies = [ "libc", "pyo3-build-config", @@ -2380,9 +2403,9 @@ dependencies = [ [[package]] name = "pyo3-macros" -version = "0.22.4" +version = "0.22.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "15e0f01b5364bcfbb686a52fc4181d412b708a68ed20c330db9fc8d2c2bf5a43" +checksum = "e655aad15e09b94ffdb3ce3d217acf652e26bbc37697ef012f5e5e348c716e5e" dependencies = [ "proc-macro2", "pyo3-macros-backend", @@ -2392,9 +2415,9 @@ dependencies = [ [[package]] name = "pyo3-macros-backend" -version = "0.22.4" +version = "0.22.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a09b550200e1e5ed9176976d0060cbc2ea82dc8515da07885e7b8153a85caacb" +checksum = "ae1e3f09eecd94618f60a455a23def79f79eba4dc561a97324bf9ac8c6df30ce" dependencies = [ "heck 0.5.0", "proc-macro2", @@ -2456,10 +2479,11 @@ dependencies = [ [[package]] name = "quinn-udp" -version = "0.5.5" +version = "0.5.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4fe68c2e9e1a1234e218683dbdf9f9dfcb094113c5ac2b938dfcb9bab4c4140b" +checksum = "e346e016eacfff12233c243718197ca12f148c84e1e84268a896699b41c71780" dependencies = [ + "cfg_aliases", "libc", "once_cell", "socket2", @@ -2517,9 +2541,9 @@ dependencies = [ [[package]] name = "regex" -version = "1.11.0" +version = "1.11.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "38200e5ee88914975b69f657f0801b6f6dccafd44fd9326302a4aaeecfacb1d8" +checksum = "b544ef1b4eac5dc2db33ea63606ae9ffcfac26c1416a2806ae0bf5f56b201191" dependencies = [ "aho-corasick", "memchr", @@ -2552,9 +2576,9 @@ checksum = "2b15c43186be67a4fd63bee50d0303afffcef381492ebe2c5d87f324e1b8815c" [[package]] name = "regress" -version = "0.9.1" +version = "0.10.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0eae2a1ebfecc58aff952ef8ccd364329abe627762f5bf09ff42eb9d98522479" +checksum = "1541daf4e4ed43a0922b7969bdc2170178bcacc5dabf7e39bc508a9fa3953a7a" dependencies = [ "hashbrown 0.14.5", "memchr", @@ -2562,9 +2586,9 @@ dependencies = [ [[package]] name = "reqwest" -version = "0.12.8" +version = "0.12.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f713147fbe92361e52392c73b8c9e48c04c6625bce969ef54dc901e58e042a7b" +checksum = "a77c62af46e79de0a562e1a9849205ffcb7fc1238876e9bd743357570e04046f" dependencies = [ "base64 0.22.1", "bytes", @@ -2649,9 +2673,9 @@ dependencies = [ [[package]] name = "rustix" -version = "0.38.37" +version = "0.38.38" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8acb788b847c24f28525660c4d7758620a7210875711f79e7f663cc152726811" +checksum = "aa260229e6538e52293eeb577aabd09945a09d6d9cc0fc550ed7529056c2e32a" dependencies = [ "bitflags 2.6.0", "errno", @@ -2662,9 +2686,9 @@ dependencies = [ [[package]] name = "rustls" -version = "0.23.14" +version = "0.23.16" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "415d9944693cb90382053259f89fbb077ea730ad7273047ec63b19bc9b160ba8" +checksum = "eee87ff5d9b36712a58574e12e9f0ea80f915a5b0ac518d322b24a465617925e" dependencies = [ "once_cell", "ring", @@ -2698,9 +2722,9 @@ dependencies = [ [[package]] name = "rustls-pki-types" -version = "1.9.0" +version = "1.10.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0e696e35370c65c9c541198af4543ccd580cf17fc25d8e05c5a242b202488c55" +checksum = "16f1201b3c9a7ee8039bcadc17b7e605e2945b27eee7631788c1bd2b0643674b" [[package]] name = "rustls-webpki" @@ -2715,9 +2739,9 @@ dependencies = [ [[package]] name = "rustversion" -version = "1.0.17" +version = "1.0.18" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "955d28af4278de8121b7ebeb796b6a45735dc01436d898801014aced2773a3d6" +checksum = "0e819f2bc632f285be6d7cd36e25940d45b2391dd6d9b939e79de557f7014248" [[package]] name = "ryu" @@ -2813,18 +2837,18 @@ checksum = "a3f0bf26fd526d2a95683cd0f87bf103b8539e2ca1ef48ce002d67aad59aa0b4" [[package]] name = "serde" -version = "1.0.210" +version = "1.0.214" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c8e3592472072e6e22e0a54d5904d9febf8508f65fb8552499a1abc7d1078c3a" +checksum = "f55c3193aca71c12ad7890f1785d2b73e1b9f63a0bbc353c08ef26fe03fc56b5" dependencies = [ "serde_derive", ] [[package]] name = "serde_derive" -version = "1.0.210" +version = "1.0.214" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "243902eda00fad750862fc144cea25caca5e20d615af0a81bee94ca738f1df1f" +checksum = "de523f781f095e28fa605cdce0f8307e451cc0fd14e2eb4cd2e98a355b147766" dependencies = [ "proc-macro2", "quote", @@ -2844,9 +2868,9 @@ dependencies = [ [[package]] name = "serde_json" -version = "1.0.128" +version = "1.0.132" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6ff5456707a1de34e7e37f2a6fd3d3f808c318259cbd01ab6377795054b483d8" +checksum = "d726bfaff4b320266d395898905d0eba0345aae23b54aee3a737e260fd46db03" dependencies = [ "itoa", "memchr", @@ -2974,9 +2998,9 @@ checksum = "6980e8d7511241f8acf4aebddbb1ff938df5eebe98691418c4468d0b72a96a67" [[package]] name = "sqlparser" -version = "0.50.0" +version = "0.51.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b2e5b515a2bd5168426033e9efbfd05500114833916f1d5c268f938b4ee130ac" +checksum = "5fe11944a61da0da3f592e19a45ebe5ab92dc14a779907ff1f08fbb797bfefc7" dependencies = [ "log", "sqlparser_derive", @@ -3042,9 +3066,9 @@ dependencies = [ [[package]] name = "substrait" -version = "0.41.9" +version = "0.45.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2a3bf05f1d7a3fd7a97790d410f6e859b3a98dcde05e7a3fc00b31b0f60fe7cb" +checksum = "a127ae9d8e443cea5c2122eb2ffe5fe489e802a1e746a09c5a5cb59d074c0aeb" dependencies = [ "heck 0.5.0", "pbjson", @@ -3055,6 +3079,7 @@ dependencies = [ "prost-build", "prost-types", "protobuf-src", + "regress", "schemars", "semver", "serde", @@ -3073,9 +3098,9 @@ checksum = "13c2bddecc57b384dee18652358fb23172facb8a2c51ccc10d74c157bdea3292" [[package]] name = "syn" -version = "2.0.79" +version = "2.0.85" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "89132cd0bf050864e1d38dc3bbc07a0eb8e7530af26344d3d2bbbef83499f590" +checksum = "5023162dfcd14ef8f32034d8bcd4cc5ddc61ef7a247c024a33e24e1f24d21b56" dependencies = [ "proc-macro2", "quote", @@ -3112,18 +3137,18 @@ dependencies = [ [[package]] name = "thiserror" -version = "1.0.64" +version = "1.0.65" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d50af8abc119fb8bb6dbabcfa89656f46f84aa0ac7688088608076ad2b459a84" +checksum = "5d11abd9594d9b38965ef50805c5e469ca9cc6f197f883f717e0269a3057b3d5" dependencies = [ "thiserror-impl", ] [[package]] name = "thiserror-impl" -version = "1.0.64" +version = "1.0.65" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "08904e7672f5eb876eaaf87e0ce17857500934f4981c4a0ab2b4aa98baac7fc3" +checksum = "ae71770322cbd277e69d762a16c444af02aa0575ac0d174f0b9562d3b37f8602" dependencies = [ "proc-macro2", "quote", @@ -3167,9 +3192,9 @@ checksum = "1f3ccbac311fea05f86f61904b462b55fb3df8837a366dfc601a0161d0532f20" [[package]] name = "tokio" -version = "1.40.0" +version = "1.41.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e2b070231665d27ad9ec9b8df639893f46727666c6767db40317fbe920a5d998" +checksum = "145f3413504347a2be84393cc8a7d2fb4d863b375909ea59f2158261aa258bbb" dependencies = [ "backtrace", "bytes", @@ -3297,9 +3322,9 @@ checksum = "42ff0bf0c66b8238c6f3b578df37d0b7848e55df8577b3f74f92a69acceeb825" [[package]] name = "typify" -version = "0.1.0" +version = "0.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "adb6beec125971dda80a086f90b4a70f60f222990ce4d63ad0fc140492f53444" +checksum = "b4c644dda9862f0fef3a570d8ddb3c2cfb1d5ac824a1f2ddfa7bc8f071a5ad8a" dependencies = [ "typify-impl", "typify-macro", @@ -3307,9 +3332,9 @@ dependencies = [ [[package]] name = "typify-impl" -version = "0.1.0" +version = "0.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "93bbb24e990654aff858d80fee8114f4322f7d7a1b1ecb45129e2fcb0d0ad5ae" +checksum = "d59ab345b6c0d8ae9500b9ff334a4c7c0d316c1c628dc55726b95887eb8dbd11" dependencies = [ "heck 0.5.0", "log", @@ -3327,9 +3352,9 @@ dependencies = [ [[package]] name = "typify-macro" -version = "0.1.0" +version = "0.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f8e6491896e955692d68361c68db2b263e3bec317ec0b684e0e2fa882fb6e31e" +checksum = "785e2cdcef0df8160fdd762ed548a637aaec1e83704fdbc14da0df66013ee8d0" dependencies = [ "proc-macro2", "quote", @@ -3447,9 +3472,9 @@ checksum = "9c8d87e72b64a3b4db28d11ce29237c246188f4f51057d65a7eab63b7987e423" [[package]] name = "wasm-bindgen" -version = "0.2.93" +version = "0.2.95" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a82edfc16a6c469f5f44dc7b571814045d60404b55a0ee849f9bcfa2e63dd9b5" +checksum = "128d1e363af62632b8eb57219c8fd7877144af57558fb2ef0368d0087bddeb2e" dependencies = [ "cfg-if", "once_cell", @@ -3458,9 +3483,9 @@ dependencies = [ [[package]] name = "wasm-bindgen-backend" -version = "0.2.93" +version = "0.2.95" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9de396da306523044d3302746f1208fa71d7532227f15e347e2d93e4145dd77b" +checksum = "cb6dd4d3ca0ddffd1dd1c9c04f94b868c37ff5fac97c30b97cff2d74fce3a358" dependencies = [ "bumpalo", "log", @@ -3473,9 +3498,9 @@ dependencies = [ [[package]] name = "wasm-bindgen-futures" -version = "0.4.43" +version = "0.4.45" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "61e9300f63a621e96ed275155c108eb6f843b6a26d053f122ab69724559dc8ed" +checksum = "cc7ec4f8827a71586374db3e87abdb5a2bb3a15afed140221307c3ec06b1f63b" dependencies = [ "cfg-if", "js-sys", @@ -3485,9 +3510,9 @@ dependencies = [ [[package]] name = "wasm-bindgen-macro" -version = "0.2.93" +version = "0.2.95" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "585c4c91a46b072c92e908d99cb1dcdf95c5218eeb6f3bf1efa991ee7a68cccf" +checksum = "e79384be7f8f5a9dd5d7167216f022090cf1f9ec128e6e6a482a2cb5c5422c56" dependencies = [ "quote", "wasm-bindgen-macro-support", @@ -3495,9 +3520,9 @@ dependencies = [ [[package]] name = "wasm-bindgen-macro-support" -version = "0.2.93" +version = "0.2.95" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "afc340c74d9005395cf9dd098506f7f44e38f2b4a21c6aaacf9a105ea5e1e836" +checksum = "26c6ab57572f7a24a4985830b120de1594465e5d500f24afe89e16b4e833ef68" dependencies = [ "proc-macro2", "quote", @@ -3508,15 +3533,15 @@ dependencies = [ [[package]] name = "wasm-bindgen-shared" -version = "0.2.93" +version = "0.2.95" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c62a0a307cb4a311d3a07867860911ca130c3494e8c2719593806c08bc5d0484" +checksum = "65fc09f10666a9f147042251e0dda9c18f166ff7de300607007e96bdebc1068d" [[package]] name = "wasm-streams" -version = "0.4.1" +version = "0.4.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4e072d4e72f700fb3443d8fe94a39315df013eef1104903cdb0a2abd322bbecd" +checksum = "15053d8d85c7eccdbefef60f06769760a563c7f0a9d6902a13d35c7800b0ad65" dependencies = [ "futures-util", "js-sys", @@ -3527,9 +3552,9 @@ dependencies = [ [[package]] name = "web-sys" -version = "0.3.70" +version = "0.3.72" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "26fdeaafd9bd129f65e7c031593c24d62186301e0c72c8978fa1678be7d532c0" +checksum = "f6488b90108c040df0fe62fa815cbdee25124641df01814dd7282749234c6112" dependencies = [ "js-sys", "wasm-bindgen", diff --git a/Cargo.toml b/Cargo.toml index 073f82cf0..11ce08c75 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -37,9 +37,10 @@ substrait = ["dep:datafusion-substrait"] tokio = { version = "1.39", features = ["macros", "rt", "rt-multi-thread", "sync"] } pyo3 = { version = "0.22", features = ["extension-module", "abi3", "abi3-py38"] } arrow = { version = "53", features = ["pyarrow"] } -datafusion = { version = "42.0.0", features = ["pyarrow", "avro", "unicode_expressions"] } -datafusion-substrait = { version = "42.0.0", optional = true } -datafusion-proto = { version = "42.0.0" } +datafusion = { version = "43.0.0", features = ["pyarrow", "avro", "unicode_expressions"] } +datafusion-substrait = { version = "43.0.0", optional = true } +datafusion-proto = { version = "43.0.0" } +datafusion-functions-window-common = { version = "43.0.0" } prost = "0.13" # keep in line with `datafusion-substrait` uuid = { version = "1.11", features = ["v4"] } mimalloc = { version = "0.1", optional = true, default-features = false, features = ["local_dynamic_tls"] } @@ -58,4 +59,4 @@ crate-type = ["cdylib", "rlib"] [profile.release] lto = true -codegen-units = 1 +codegen-units = 1 \ No newline at end of file diff --git a/examples/tpch/_tests.py b/examples/tpch/_tests.py index 13144ae9d..3ce9cdfe5 100644 --- a/examples/tpch/_tests.py +++ b/examples/tpch/_tests.py @@ -25,7 +25,7 @@ def df_selection(col_name, col_type): if col_type == pa.float64() or isinstance(col_type, pa.Decimal128Type): return F.round(col(col_name), lit(2)).alias(col_name) - elif col_type == pa.string(): + elif col_type == pa.string() or col_type == pa.string_view(): return F.trim(col(col_name)).alias(col_name) else: return col(col_name) @@ -43,7 +43,7 @@ def load_schema(col_name, col_type): def expected_selection(col_name, col_type): if col_type == pa.int64() or col_type == pa.int32(): return F.trim(col(col_name)).cast(col_type).alias(col_name) - elif col_type == pa.string(): + elif col_type == pa.string() or col_type == pa.string_view(): return F.trim(col(col_name)).alias(col_name) else: return col(col_name) diff --git a/python/datafusion/expr.py b/python/datafusion/expr.py index c4e7713f3..b10724381 100644 --- a/python/datafusion/expr.py +++ b/python/datafusion/expr.py @@ -51,7 +51,6 @@ Column = expr_internal.Column CreateMemoryTable = expr_internal.CreateMemoryTable CreateView = expr_internal.CreateView -CrossJoin = expr_internal.CrossJoin Distinct = expr_internal.Distinct DropTable = expr_internal.DropTable EmptyRelation = expr_internal.EmptyRelation @@ -140,7 +139,6 @@ "Join", "JoinType", "JoinConstraint", - "CrossJoin", "Union", "Unnest", "UnnestExpr", @@ -376,6 +374,8 @@ def literal(value: Any) -> Expr: ``value`` must be a valid PyArrow scalar value or easily castable to one. """ + if isinstance(value, str): + value = pa.scalar(value, type=pa.string_view()) if not isinstance(value, pa.Scalar): value = pa.scalar(value) return Expr(expr_internal.Expr.literal(value)) diff --git a/python/datafusion/functions.py b/python/datafusion/functions.py index 907f801af..5a2eab56d 100644 --- a/python/datafusion/functions.py +++ b/python/datafusion/functions.py @@ -297,7 +297,7 @@ def decode(input: Expr, encoding: Expr) -> Expr: def array_to_string(expr: Expr, delimiter: Expr) -> Expr: """Converts each element to its text representation.""" - return Expr(f.array_to_string(expr.expr, delimiter.expr)) + return Expr(f.array_to_string(expr.expr, delimiter.expr.cast(pa.string()))) def array_join(expr: Expr, delimiter: Expr) -> Expr: @@ -1067,7 +1067,10 @@ def struct(*args: Expr) -> Expr: def named_struct(name_pairs: list[tuple[str, Expr]]) -> Expr: """Returns a struct with the given names and arguments pairs.""" - name_pair_exprs = [[Expr.literal(pair[0]), pair[1]] for pair in name_pairs] + name_pair_exprs = [ + [Expr.literal(pa.scalar(pair[0], type=pa.string())), pair[1]] + for pair in name_pairs + ] # flatten name_pairs = [x.expr for xs in name_pair_exprs for x in xs] @@ -1424,7 +1427,9 @@ def array_sort(array: Expr, descending: bool = False, null_first: bool = False) nulls_first = "NULLS FIRST" if null_first else "NULLS LAST" return Expr( f.array_sort( - array.expr, Expr.literal(desc).expr, Expr.literal(nulls_first).expr + array.expr, + Expr.literal(pa.scalar(desc, type=pa.string())).expr, + Expr.literal(pa.scalar(nulls_first, type=pa.string())).expr, ) ) diff --git a/python/datafusion/udf.py b/python/datafusion/udf.py index 291ef2bae..d9d994b22 100644 --- a/python/datafusion/udf.py +++ b/python/datafusion/udf.py @@ -229,6 +229,7 @@ def udaf( which this UDAF is used. The following examples are all valid. .. code-block:: python + import pyarrow as pa import pyarrow.compute as pc diff --git a/python/tests/test_expr.py b/python/tests/test_expr.py index 1847edef2..77f88aa44 100644 --- a/python/tests/test_expr.py +++ b/python/tests/test_expr.py @@ -85,14 +85,18 @@ def test_limit(test_ctx): plan = plan.to_variant() assert isinstance(plan, Limit) - assert plan.skip() == 0 + # TODO: Upstream now has expressions for skip and fetch + # REF: https://github.com/apache/datafusion/pull/12836 + # assert plan.skip() == 0 df = test_ctx.sql("select c1 from test LIMIT 10 OFFSET 5") plan = df.logical_plan() plan = plan.to_variant() assert isinstance(plan, Limit) - assert plan.skip() == 5 + # TODO: Upstream now has expressions for skip and fetch + # REF: https://github.com/apache/datafusion/pull/12836 + # assert plan.skip() == 5 def test_aggregate_query(test_ctx): @@ -126,7 +130,10 @@ def test_relational_expr(test_ctx): ctx = SessionContext() batch = pa.RecordBatch.from_arrays( - [pa.array([1, 2, 3]), pa.array(["alpha", "beta", "gamma"])], + [ + pa.array([1, 2, 3]), + pa.array(["alpha", "beta", "gamma"], type=pa.string_view()), + ], names=["a", "b"], ) df = ctx.create_dataframe([[batch]], name="batch_array") @@ -141,7 +148,8 @@ def test_relational_expr(test_ctx): assert df.filter(col("b") == "beta").count() == 1 assert df.filter(col("b") != "beta").count() == 2 - assert df.filter(col("a") == "beta").count() == 0 + with pytest.raises(Exception): + df.filter(col("a") == "beta").count() def test_expr_to_variant(): diff --git a/python/tests/test_functions.py b/python/tests/test_functions.py index c65c633a4..b3a5a0652 100644 --- a/python/tests/test_functions.py +++ b/python/tests/test_functions.py @@ -34,9 +34,9 @@ def df(): # create a RecordBatch and a new DataFrame from it batch = pa.RecordBatch.from_arrays( [ - pa.array(["Hello", "World", "!"]), + pa.array(["Hello", "World", "!"], type=pa.string_view()), pa.array([4, 5, 6]), - pa.array(["hello ", " world ", " !"]), + pa.array(["hello ", " world ", " !"], type=pa.string_view()), pa.array( [ datetime(2022, 12, 31), @@ -88,8 +88,8 @@ def test_literal(df): assert len(result) == 1 result = result[0] assert result.column(0) == pa.array([1] * 3) - assert result.column(1) == pa.array(["1"] * 3) - assert result.column(2) == pa.array(["OK"] * 3) + assert result.column(1) == pa.array(["1"] * 3, type=pa.string_view()) + assert result.column(2) == pa.array(["OK"] * 3, type=pa.string_view()) assert result.column(3) == pa.array([3.14] * 3) assert result.column(4) == pa.array([True] * 3) assert result.column(5) == pa.array([b"hello world"] * 3) @@ -97,7 +97,9 @@ def test_literal(df): def test_lit_arith(df): """Test literals with arithmetic operations""" - df = df.select(literal(1) + column("b"), f.concat(column("a"), literal("!"))) + df = df.select( + literal(1) + column("b"), f.concat(column("a").cast(pa.string()), literal("!")) + ) result = df.collect() assert len(result) == 1 result = result[0] @@ -600,21 +602,33 @@ def test_array_function_obj_tests(stmt, py_expr): f.ascii(column("a")), pa.array([72, 87, 33], type=pa.int32()), ), # H = 72; W = 87; ! = 33 - (f.bit_length(column("a")), pa.array([40, 40, 8], type=pa.int32())), - (f.btrim(literal(" World ")), pa.array(["World", "World", "World"])), + ( + f.bit_length(column("a").cast(pa.string())), + pa.array([40, 40, 8], type=pa.int32()), + ), + ( + f.btrim(literal(" World ")), + pa.array(["World", "World", "World"], type=pa.string_view()), + ), (f.character_length(column("a")), pa.array([5, 5, 1], type=pa.int32())), (f.chr(literal(68)), pa.array(["D", "D", "D"])), ( f.concat_ws("-", column("a"), literal("test")), pa.array(["Hello-test", "World-test", "!-test"]), ), - (f.concat(column("a"), literal("?")), pa.array(["Hello?", "World?", "!?"])), + ( + f.concat(column("a").cast(pa.string()), literal("?")), + pa.array(["Hello?", "World?", "!?"]), + ), (f.initcap(column("c")), pa.array(["Hello ", " World ", " !"])), (f.left(column("a"), literal(3)), pa.array(["Hel", "Wor", "!"])), (f.length(column("c")), pa.array([6, 7, 2], type=pa.int32())), (f.lower(column("a")), pa.array(["hello", "world", "!"])), (f.lpad(column("a"), literal(7)), pa.array([" Hello", " World", " !"])), - (f.ltrim(column("c")), pa.array(["hello ", "world ", "!"])), + ( + f.ltrim(column("c")), + pa.array(["hello ", "world ", "!"], type=pa.string_view()), + ), ( f.md5(column("a")), pa.array( @@ -640,19 +654,25 @@ def test_array_function_obj_tests(stmt, py_expr): f.rpad(column("a"), literal(8)), pa.array(["Hello ", "World ", "! "]), ), - (f.rtrim(column("c")), pa.array(["hello", " world", " !"])), + ( + f.rtrim(column("c")), + pa.array(["hello", " world", " !"], type=pa.string_view()), + ), ( f.split_part(column("a"), literal("l"), literal(1)), pa.array(["He", "Wor", "!"]), ), (f.starts_with(column("a"), literal("Wor")), pa.array([False, True, False])), (f.strpos(column("a"), literal("o")), pa.array([5, 2, 0], type=pa.int32())), - (f.substr(column("a"), literal(3)), pa.array(["llo", "rld", ""])), + ( + f.substr(column("a"), literal(3)), + pa.array(["llo", "rld", ""], type=pa.string_view()), + ), ( f.translate(column("a"), literal("or"), literal("ld")), pa.array(["Helll", "Wldld", "!"]), ), - (f.trim(column("c")), pa.array(["hello", "world", "!"])), + (f.trim(column("c")), pa.array(["hello", "world", "!"], type=pa.string_view())), (f.upper(column("c")), pa.array(["HELLO ", " WORLD ", " !"])), (f.ends_with(column("a"), literal("llo")), pa.array([True, False, False])), ( @@ -794,9 +814,9 @@ def test_temporal_functions(df): f.date_trunc(literal("month"), column("d")), f.datetrunc(literal("day"), column("d")), f.date_bin( - literal("15 minutes"), + literal("15 minutes").cast(pa.string()), column("d"), - literal("2001-01-01 00:02:30"), + literal("2001-01-01 00:02:30").cast(pa.string()), ), f.from_unixtime(literal(1673383974)), f.to_timestamp(literal("2023-09-07 05:06:14.523952")), @@ -858,8 +878,8 @@ def test_case(df): result = df.collect() result = result[0] assert result.column(0) == pa.array([10, 8, 8]) - assert result.column(1) == pa.array(["Hola", "Mundo", "!!"]) - assert result.column(2) == pa.array(["Hola", "Mundo", None]) + assert result.column(1) == pa.array(["Hola", "Mundo", "!!"], type=pa.string_view()) + assert result.column(2) == pa.array(["Hola", "Mundo", None], type=pa.string_view()) def test_when_with_no_base(df): @@ -877,8 +897,10 @@ def test_when_with_no_base(df): result = df.collect() result = result[0] assert result.column(0) == pa.array([4, 5, 6]) - assert result.column(1) == pa.array(["too small", "just right", "too big"]) - assert result.column(2) == pa.array(["Hello", None, None]) + assert result.column(1) == pa.array( + ["too small", "just right", "too big"], type=pa.string_view() + ) + assert result.column(2) == pa.array(["Hello", None, None], type=pa.string_view()) def test_regr_funcs_sql(df): @@ -1021,8 +1043,13 @@ def test_regr_funcs_df(func, expected): def test_binary_string_functions(df): df = df.select( - f.encode(column("a"), literal("base64")), - f.decode(f.encode(column("a"), literal("base64")), literal("base64")), + f.encode(column("a").cast(pa.string()), literal("base64").cast(pa.string())), + f.decode( + f.encode( + column("a").cast(pa.string()), literal("base64").cast(pa.string()) + ), + literal("base64").cast(pa.string()), + ), ) result = df.collect() assert len(result) == 1 diff --git a/python/tests/test_imports.py b/python/tests/test_imports.py index 3d324fb62..6ea77b15f 100644 --- a/python/tests/test_imports.py +++ b/python/tests/test_imports.py @@ -46,7 +46,6 @@ Join, JoinType, JoinConstraint, - CrossJoin, Union, Like, ILike, @@ -129,7 +128,6 @@ def test_class_module_is_datafusion(): Join, JoinType, JoinConstraint, - CrossJoin, Union, Like, ILike, diff --git a/python/tests/test_sql.py b/python/tests/test_sql.py index 39e5ffe6d..a2521dd09 100644 --- a/python/tests/test_sql.py +++ b/python/tests/test_sql.py @@ -468,6 +468,13 @@ def test_simple_select(ctx, tmp_path, arr): batches = ctx.sql("SELECT a AS tt FROM t").collect() result = batches[0].column(0) + # In DF 43.0.0 we now default to having BinaryView and StringView + # so the array that is saved to the parquet is slightly different + # than the array read. Convert to values for comparison. + if isinstance(result, pa.BinaryViewArray) or isinstance(result, pa.StringViewArray): + arr = arr.tolist() + result = result.tolist() + np.testing.assert_equal(result, arr) diff --git a/src/context.rs b/src/context.rs index f445874d6..c2a263fa7 100644 --- a/src/context.rs +++ b/src/context.rs @@ -287,7 +287,7 @@ impl PySessionContext { } else { RuntimeConfig::default() }; - let runtime = Arc::new(RuntimeEnv::new(runtime_config)?); + let runtime = Arc::new(RuntimeEnv::try_new(runtime_config)?); let session_state = SessionStateBuilder::new() .with_config(config) .with_runtime_env(runtime) diff --git a/src/dataframe.rs b/src/dataframe.rs index ee8fbbf9d..e7d6ca6d6 100644 --- a/src/dataframe.rs +++ b/src/dataframe.rs @@ -402,7 +402,9 @@ impl PyDataFrame { #[pyo3(signature = (column, preserve_nulls=true))] fn unnest_column(&self, column: &str, preserve_nulls: bool) -> PyResult { - let unnest_options = UnnestOptions { preserve_nulls }; + // TODO: expose RecursionUnnestOptions + // REF: https://github.com/apache/datafusion/pull/11577 + let unnest_options = UnnestOptions::default().with_preserve_nulls(preserve_nulls); let df = self .df .as_ref() @@ -413,7 +415,9 @@ impl PyDataFrame { #[pyo3(signature = (columns, preserve_nulls=true))] fn unnest_columns(&self, columns: Vec, preserve_nulls: bool) -> PyResult { - let unnest_options = UnnestOptions { preserve_nulls }; + // TODO: expose RecursionUnnestOptions + // REF: https://github.com/apache/datafusion/pull/11577 + let unnest_options = UnnestOptions::default().with_preserve_nulls(preserve_nulls); let cols = columns.iter().map(|s| s.as_ref()).collect::>(); let df = self .df diff --git a/src/expr.rs b/src/expr.rs index 49fa4b845..bca0cd3fa 100644 --- a/src/expr.rs +++ b/src/expr.rs @@ -65,7 +65,6 @@ pub mod column; pub mod conditional_expr; pub mod create_memory_table; pub mod create_view; -pub mod cross_join; pub mod distinct; pub mod drop_table; pub mod empty_relation; @@ -775,7 +774,6 @@ pub(crate) fn init_module(m: &Bound<'_, PyModule>) -> PyResult<()> { m.add_class::()?; m.add_class::()?; m.add_class::()?; - m.add_class::()?; m.add_class::()?; m.add_class::()?; m.add_class::()?; diff --git a/src/expr/cross_join.rs b/src/expr/cross_join.rs deleted file mode 100644 index 5bc202aac..000000000 --- a/src/expr/cross_join.rs +++ /dev/null @@ -1,94 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -use datafusion::logical_expr::logical_plan::CrossJoin; -use pyo3::prelude::*; -use std::fmt::{self, Display, Formatter}; - -use super::logical_node::LogicalNode; -use crate::common::df_schema::PyDFSchema; -use crate::sql::logical::PyLogicalPlan; - -#[pyclass(name = "CrossJoin", module = "datafusion.expr", subclass)] -#[derive(Clone)] -pub struct PyCrossJoin { - cross_join: CrossJoin, -} - -impl From for PyCrossJoin { - fn from(cross_join: CrossJoin) -> PyCrossJoin { - PyCrossJoin { cross_join } - } -} - -impl From for CrossJoin { - fn from(cross_join: PyCrossJoin) -> Self { - cross_join.cross_join - } -} - -impl Display for PyCrossJoin { - fn fmt(&self, f: &mut Formatter) -> fmt::Result { - write!( - f, - "CrossJoin - \nLeft: {:?} - \nRight: {:?} - \nSchema: {:?}", - &self.cross_join.left, &self.cross_join.right, &self.cross_join.schema - ) - } -} - -#[pymethods] -impl PyCrossJoin { - /// Retrieves the left input `LogicalPlan` to this `CrossJoin` node - fn left(&self) -> PyResult { - Ok(self.cross_join.left.as_ref().clone().into()) - } - - /// Retrieves the right input `LogicalPlan` to this `CrossJoin` node - fn right(&self) -> PyResult { - Ok(self.cross_join.right.as_ref().clone().into()) - } - - /// Resulting Schema for this `CrossJoin` node instance - fn schema(&self) -> PyResult { - Ok(self.cross_join.schema.as_ref().clone().into()) - } - - fn __repr__(&self) -> PyResult { - Ok(format!("CrossJoin({})", self)) - } - - fn __name__(&self) -> PyResult { - Ok("CrossJoin".to_string()) - } -} - -impl LogicalNode for PyCrossJoin { - fn inputs(&self) -> Vec { - vec![ - PyLogicalPlan::from((*self.cross_join.left).clone()), - PyLogicalPlan::from((*self.cross_join.right).clone()), - ] - } - - fn to_variant(&self, py: Python) -> PyResult { - Ok(self.clone().into_py(py)) - } -} diff --git a/src/expr/limit.rs b/src/expr/limit.rs index 876e154c1..84ad7d68b 100644 --- a/src/expr/limit.rs +++ b/src/expr/limit.rs @@ -46,7 +46,7 @@ impl Display for PyLimit { write!( f, "Limit - Skip: {} + Skip: {:?} Fetch: {:?} Input: {:?}", &self.limit.skip, &self.limit.fetch, &self.limit.input @@ -56,15 +56,19 @@ impl Display for PyLimit { #[pymethods] impl PyLimit { - /// Retrieves the skip value for this `Limit` - fn skip(&self) -> usize { - self.limit.skip - } + // NOTE: Upstream now has expressions for skip and fetch + // TODO: Do we still want to expose these? + // REF: https://github.com/apache/datafusion/pull/12836 - /// Retrieves the fetch value for this `Limit` - fn fetch(&self) -> Option { - self.limit.fetch - } + // /// Retrieves the skip value for this `Limit` + // fn skip(&self) -> usize { + // self.limit.skip + // } + + // /// Retrieves the fetch value for this `Limit` + // fn fetch(&self) -> Option { + // self.limit.fetch + // } /// Retrieves the input `LogicalPlan` to this `Limit` node fn input(&self) -> PyResult> { diff --git a/src/functions.rs b/src/functions.rs index fe3531ba9..e29c57f9b 100644 --- a/src/functions.rs +++ b/src/functions.rs @@ -16,7 +16,6 @@ // under the License. use datafusion::functions_aggregate::all_default_aggregate_functions; -use datafusion::logical_expr::window_function; use datafusion::logical_expr::ExprFunctionExt; use datafusion::logical_expr::WindowFrame; use pyo3::{prelude::*, wrap_pyfunction}; @@ -33,6 +32,7 @@ use datafusion::common::{Column, ScalarValue, TableReference}; use datafusion::execution::FunctionRegistry; use datafusion::functions; use datafusion::functions_aggregate; +use datafusion::functions_window; use datafusion::logical_expr::expr::Alias; use datafusion::logical_expr::sqlparser::ast::NullTreatment as DFNullTreatment; use datafusion::logical_expr::{ @@ -758,7 +758,7 @@ pub fn lead( partition_by: Option>, order_by: Option>, ) -> PyResult { - let window_fn = window_function::lead(arg.expr, Some(shift_offset), default_value); + let window_fn = functions_window::expr_fn::lead(arg.expr, Some(shift_offset), default_value); add_builder_fns_to_window(window_fn, partition_by, None, order_by, None) } @@ -772,7 +772,7 @@ pub fn lag( partition_by: Option>, order_by: Option>, ) -> PyResult { - let window_fn = window_function::lag(arg.expr, Some(shift_offset), default_value); + let window_fn = functions_window::expr_fn::lag(arg.expr, Some(shift_offset), default_value); add_builder_fns_to_window(window_fn, partition_by, None, order_by, None) } @@ -783,7 +783,7 @@ pub fn row_number( partition_by: Option>, order_by: Option>, ) -> PyResult { - let window_fn = datafusion::functions_window::expr_fn::row_number(); + let window_fn = functions_window::expr_fn::row_number(); add_builder_fns_to_window(window_fn, partition_by, None, order_by, None) } @@ -794,7 +794,7 @@ pub fn rank( partition_by: Option>, order_by: Option>, ) -> PyResult { - let window_fn = window_function::rank(); + let window_fn = functions_window::expr_fn::rank(); add_builder_fns_to_window(window_fn, partition_by, None, order_by, None) } @@ -805,7 +805,7 @@ pub fn dense_rank( partition_by: Option>, order_by: Option>, ) -> PyResult { - let window_fn = window_function::dense_rank(); + let window_fn = functions_window::expr_fn::dense_rank(); add_builder_fns_to_window(window_fn, partition_by, None, order_by, None) } @@ -816,7 +816,7 @@ pub fn percent_rank( partition_by: Option>, order_by: Option>, ) -> PyResult { - let window_fn = window_function::percent_rank(); + let window_fn = functions_window::expr_fn::percent_rank(); add_builder_fns_to_window(window_fn, partition_by, None, order_by, None) } @@ -827,7 +827,7 @@ pub fn cume_dist( partition_by: Option>, order_by: Option>, ) -> PyResult { - let window_fn = window_function::cume_dist(); + let window_fn = functions_window::expr_fn::cume_dist(); add_builder_fns_to_window(window_fn, partition_by, None, order_by, None) } @@ -839,7 +839,7 @@ pub fn ntile( partition_by: Option>, order_by: Option>, ) -> PyResult { - let window_fn = window_function::ntile(arg.into()); + let window_fn = functions_window::expr_fn::ntile(arg.into()); add_builder_fns_to_window(window_fn, partition_by, None, order_by, None) } diff --git a/src/sql/logical.rs b/src/sql/logical.rs index fc398ff89..40f0a6a65 100644 --- a/src/sql/logical.rs +++ b/src/sql/logical.rs @@ -19,7 +19,6 @@ use std::sync::Arc; use crate::expr::aggregate::PyAggregate; use crate::expr::analyze::PyAnalyze; -use crate::expr::cross_join::PyCrossJoin; use crate::expr::distinct::PyDistinct; use crate::expr::empty_relation::PyEmptyRelation; use crate::expr::explain::PyExplain; @@ -68,7 +67,6 @@ impl PyLogicalPlan { match self.plan.as_ref() { LogicalPlan::Aggregate(plan) => PyAggregate::from(plan.clone()).to_variant(py), LogicalPlan::Analyze(plan) => PyAnalyze::from(plan.clone()).to_variant(py), - LogicalPlan::CrossJoin(plan) => PyCrossJoin::from(plan.clone()).to_variant(py), LogicalPlan::Distinct(plan) => PyDistinct::from(plan.clone()).to_variant(py), LogicalPlan::EmptyRelation(plan) => PyEmptyRelation::from(plan.clone()).to_variant(py), LogicalPlan::Explain(plan) => PyExplain::from(plan.clone()).to_variant(py), @@ -92,6 +90,7 @@ impl PyLogicalPlan { | LogicalPlan::Ddl(_) | LogicalPlan::Copy(_) | LogicalPlan::DescribeTable(_) + | LogicalPlan::Execute(_) | LogicalPlan::RecursiveQuery(_) => Err(py_unsupported_variant_err(format!( "Conversion of variant not implemented: {:?}", self.plan diff --git a/src/udf.rs b/src/udf.rs index ec8efb169..4570e77a6 100644 --- a/src/udf.rs +++ b/src/udf.rs @@ -31,7 +31,7 @@ use datafusion::logical_expr::{create_udf, ColumnarValue}; use crate::expr::PyExpr; use crate::utils::parse_volatility; -/// Create a Rust callable function fr a python function that expects pyarrow arrays +/// Create a Rust callable function from a python function that expects pyarrow arrays fn pyarrow_function_to_rust( func: PyObject, ) -> impl Fn(&[ArrayRef]) -> Result { @@ -97,7 +97,7 @@ impl PyScalarUDF { let function = create_udf( name, input_types.0, - Arc::new(return_type.0), + return_type.0, parse_volatility(volatility)?, to_scalar_function_impl(func), ); diff --git a/src/udwf.rs b/src/udwf.rs index 43c21ec7b..3f5ad0b1d 100644 --- a/src/udwf.rs +++ b/src/udwf.rs @@ -20,11 +20,16 @@ use std::ops::Range; use std::sync::Arc; use arrow::array::{make_array, Array, ArrayData, ArrayRef}; +use datafusion::logical_expr::function::{PartitionEvaluatorArgs, WindowUDFFieldArgs}; use datafusion::logical_expr::window_state::WindowAggState; +use datafusion::physical_plan::PhysicalExpr; use datafusion::scalar::ScalarValue; +use datafusion_functions_window_common::expr::ExpressionArgs; use pyo3::exceptions::PyValueError; use pyo3::prelude::*; +use crate::expr::PyExpr; +use crate::utils::parse_volatility; use datafusion::arrow::datatypes::DataType; use datafusion::arrow::pyarrow::{FromPyArrow, PyArrowType, ToPyArrow}; use datafusion::error::{DataFusionError, Result}; @@ -33,9 +38,6 @@ use datafusion::logical_expr::{ }; use pyo3::types::{PyList, PyTuple}; -use crate::expr::PyExpr; -use crate::utils::parse_volatility; - #[derive(Debug)] struct RustPartitionEvaluator { evaluator: PyObject, @@ -90,6 +92,7 @@ impl PartitionEvaluator for RustPartitionEvaluator { } fn evaluate_all(&mut self, values: &[ArrayRef], num_rows: usize) -> Result { + println!("evaluate all called with number of values {}", values.len()); Python::with_gil(|py| { let py_values = PyList::new_bound( py, @@ -299,11 +302,25 @@ impl WindowUDFImpl for MultiColumnWindowUDF { &self.signature } - fn return_type(&self, _arg_types: &[DataType]) -> Result { - Ok(self.return_type.clone()) + fn field(&self, field_args: WindowUDFFieldArgs) -> Result { + // TODO: Should nullable always be `true`? + Ok(arrow::datatypes::Field::new( + field_args.name(), + self.return_type.clone(), + true, + )) } - fn partition_evaluator(&self) -> Result> { + // TODO: Enable passing partition_evaluator_args to python? + fn partition_evaluator( + &self, + _partition_evaluator_args: PartitionEvaluatorArgs, + ) -> Result> { + let _ = _partition_evaluator_args; (self.partition_evaluator_factory)() } + + fn expressions(&self, expr_args: ExpressionArgs) -> Vec> { + expr_args.input_exprs().into() + } }