From 8f80aa5f6d662801f6d8d38cb1d4941604911cb5 Mon Sep 17 00:00:00 2001 From: Jia-Xuan Liu Date: Wed, 27 Nov 2024 12:11:48 +0800 Subject: [PATCH 1/8] upgrade datafusion version and fix test --- wren-core-py/Cargo.lock | 237 +++++++++++------- wren-core/Cargo.toml | 5 +- .../sqllogictest/test_files/tpch/q7.slt.part | 2 +- .../sqllogictest/test_files/tpch/q8.slt.part | 2 +- .../sqllogictest/test_files/tpch/q9.slt.part | 2 +- 5 files changed, 153 insertions(+), 95 deletions(-) diff --git a/wren-core-py/Cargo.lock b/wren-core-py/Cargo.lock index 23e07dfd9..6839ba972 100644 --- a/wren-core-py/Cargo.lock +++ b/wren-core-py/Cargo.lock @@ -139,9 +139,9 @@ checksum = "7c02d123df017efcdfbd739ef81735b36c5ba83ec3c59c80a9d7ecc718f92e50" [[package]] name = "arrow" -version = "53.2.0" +version = "53.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4caf25cdc4a985f91df42ed9e9308e1adbcd341a31a72605c697033fcef163e3" +checksum = "c91839b07e474b3995035fd8ac33ee54f9c9ccbbb1ea33d9909c71bffdf1259d" dependencies = [ "arrow-arith", "arrow-array", @@ -160,9 +160,9 @@ dependencies = [ [[package]] name = "arrow-arith" -version = "53.2.0" +version = "53.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "91f2dfd1a7ec0aca967dfaa616096aec49779adc8eccec005e2f5e4111b1192a" +checksum = "855c57c4efd26722b044dcd3e348252560e3e0333087fb9f6479dc0bf744054f" dependencies = [ "arrow-array", "arrow-buffer", @@ -175,9 +175,9 @@ dependencies = [ [[package]] name = "arrow-array" -version = "53.2.0" +version = "53.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d39387ca628be747394890a6e47f138ceac1aa912eab64f02519fed24b637af8" +checksum = "bd03279cea46569acf9295f6224fbc370c5df184b4d2ecfe97ccb131d5615a7f" dependencies = [ "ahash", "arrow-buffer", @@ -186,15 +186,15 @@ dependencies = [ "chrono", "chrono-tz", "half", - "hashbrown 0.14.5", + "hashbrown 0.15.1", "num", ] [[package]] name = "arrow-buffer" -version = "53.2.0" +version = "53.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9e51e05228852ffe3eb391ce7178a0f97d2cf80cc6ef91d3c4a6b3cb688049ec" +checksum = "9e4a9b9b1d6d7117f6138e13bc4dd5daa7f94e671b70e8c9c4dc37b4f5ecfc16" dependencies = [ "bytes", "half", @@ -203,9 +203,9 @@ dependencies = [ [[package]] name = "arrow-cast" -version = "53.2.0" +version = "53.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d09aea56ec9fa267f3f3f6cdab67d8a9974cbba90b3aa38c8fe9d0bb071bd8c1" +checksum = "bc70e39916e60c5b7af7a8e2719e3ae589326039e1e863675a008bee5ffe90fd" dependencies = [ "arrow-array", "arrow-buffer", @@ -224,9 +224,9 @@ dependencies = [ [[package]] name = "arrow-csv" -version = "53.2.0" +version = "53.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c07b5232be87d115fde73e32f2ca7f1b353bff1b44ac422d3c6fc6ae38f11f0d" +checksum = "789b2af43c1049b03a8d088ff6b2257cdcea1756cd76b174b1f2600356771b97" dependencies = [ "arrow-array", "arrow-buffer", @@ -243,9 +243,9 @@ dependencies = [ [[package]] name = "arrow-data" -version = "53.2.0" +version = "53.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b98ae0af50890b494cebd7d6b04b35e896205c1d1df7b29a6272c5d0d0249ef5" +checksum = "e4e75edf21ffd53744a9b8e3ed11101f610e7ceb1a29860432824f1834a1f623" dependencies = [ "arrow-buffer", "arrow-schema", @@ -255,9 +255,9 @@ dependencies = [ [[package]] name = "arrow-ipc" -version = "53.2.0" +version = "53.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0ed91bdeaff5a1c00d28d8f73466bcb64d32bbd7093b5a30156b4b9f4dba3eee" +checksum = "d186a909dece9160bf8312f5124d797884f608ef5435a36d9d608e0b2a9bcbf8" dependencies = [ "arrow-array", "arrow-buffer", @@ -270,9 +270,9 @@ dependencies = [ [[package]] name = "arrow-json" -version = "53.2.0" +version = "53.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0471f51260a5309307e5d409c9dc70aede1cd9cf1d4ff0f0a1e8e1a2dd0e0d3c" +checksum = "b66ff2fedc1222942d0bd2fd391cb14a85baa3857be95c9373179bd616753b85" dependencies = [ "arrow-array", "arrow-buffer", @@ -290,9 +290,9 @@ dependencies = [ [[package]] name = "arrow-ord" -version = "53.2.0" +version = "53.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2883d7035e0b600fb4c30ce1e50e66e53d8656aa729f2bfa4b51d359cf3ded52" +checksum = "ece7b5bc1180e6d82d1a60e1688c199829e8842e38497563c3ab6ea813e527fd" dependencies = [ "arrow-array", "arrow-buffer", @@ -305,9 +305,9 @@ dependencies = [ [[package]] name = "arrow-row" -version = "53.2.0" +version = "53.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "552907e8e587a6fde4f8843fd7a27a576a260f65dab6c065741ea79f633fc5be" +checksum = "745c114c8f0e8ce211c83389270de6fbe96a9088a7b32c2a041258a443fe83ff" dependencies = [ "ahash", "arrow-array", @@ -319,15 +319,15 @@ dependencies = [ [[package]] name = "arrow-schema" -version = "53.2.0" +version = "53.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "539ada65246b949bd99ffa0881a9a15a4a529448af1a07a9838dd78617dafab1" +checksum = "b95513080e728e4cec37f1ff5af4f12c9688d47795d17cda80b6ec2cf74d4678" [[package]] name = "arrow-select" -version = "53.2.0" +version = "53.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6259e566b752da6dceab91766ed8b2e67bf6270eb9ad8a6e07a33c1bede2b125" +checksum = "8e415279094ea70323c032c6e739c48ad8d80e78a09bef7117b8718ad5bf3722" dependencies = [ "ahash", "arrow-array", @@ -339,9 +339,9 @@ dependencies = [ [[package]] name = "arrow-string" -version = "53.2.0" +version = "53.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f3179ccbd18ebf04277a095ba7321b93fd1f774f18816bd5f6b3ce2f594edb6c" +checksum = "11d956cae7002eb8d83a27dbd34daaea1cf5b75852f0b84deb4d93a276e92bbf" dependencies = [ "arrow-array", "arrow-buffer", @@ -734,8 +734,8 @@ dependencies = [ [[package]] name = "datafusion" -version = "42.2.0" -source = "git+https://github.com/apache/datafusion.git?rev=7c6f891b4b5a007e29fb3890ed5315ef916ae1d3#7c6f891b4b5a007e29fb3890ed5315ef916ae1d3" +version = "43.0.0" +source = "git+https://github.com/goldmedal/datafusion.git?branch=wren/support-array-struct#1862366b3543b7522e0601a7e29a7bd97adc599c" dependencies = [ "ahash", "arrow", @@ -790,8 +790,8 @@ dependencies = [ [[package]] name = "datafusion-catalog" -version = "42.2.0" -source = "git+https://github.com/apache/datafusion.git?rev=7c6f891b4b5a007e29fb3890ed5315ef916ae1d3#7c6f891b4b5a007e29fb3890ed5315ef916ae1d3" +version = "43.0.0" +source = "git+https://github.com/goldmedal/datafusion.git?branch=wren/support-array-struct#1862366b3543b7522e0601a7e29a7bd97adc599c" dependencies = [ "arrow-schema", "async-trait", @@ -804,8 +804,8 @@ dependencies = [ [[package]] name = "datafusion-common" -version = "42.2.0" -source = "git+https://github.com/apache/datafusion.git?rev=7c6f891b4b5a007e29fb3890ed5315ef916ae1d3#7c6f891b4b5a007e29fb3890ed5315ef916ae1d3" +version = "43.0.0" +source = "git+https://github.com/goldmedal/datafusion.git?branch=wren/support-array-struct#1862366b3543b7522e0601a7e29a7bd97adc599c" dependencies = [ "ahash", "arrow", @@ -816,29 +816,35 @@ dependencies = [ "half", "hashbrown 0.14.5", "indexmap 2.6.0", - "instant", "libc", "num_cpus", "object_store", "parquet", "paste", + "recursive", "sqlparser", "tokio", + "web-time", ] [[package]] name = "datafusion-common-runtime" -version = "42.2.0" -source = "git+https://github.com/apache/datafusion.git?rev=7c6f891b4b5a007e29fb3890ed5315ef916ae1d3#7c6f891b4b5a007e29fb3890ed5315ef916ae1d3" +version = "43.0.0" +source = "git+https://github.com/goldmedal/datafusion.git?branch=wren/support-array-struct#1862366b3543b7522e0601a7e29a7bd97adc599c" dependencies = [ "log", "tokio", ] +[[package]] +name = "datafusion-doc" +version = "43.0.0" +source = "git+https://github.com/goldmedal/datafusion.git?branch=wren/support-array-struct#1862366b3543b7522e0601a7e29a7bd97adc599c" + [[package]] name = "datafusion-execution" -version = "42.2.0" -source = "git+https://github.com/apache/datafusion.git?rev=7c6f891b4b5a007e29fb3890ed5315ef916ae1d3#7c6f891b4b5a007e29fb3890ed5315ef916ae1d3" +version = "43.0.0" +source = "git+https://github.com/goldmedal/datafusion.git?branch=wren/support-array-struct#1862366b3543b7522e0601a7e29a7bd97adc599c" dependencies = [ "arrow", "chrono", @@ -857,8 +863,8 @@ dependencies = [ [[package]] name = "datafusion-expr" -version = "42.2.0" -source = "git+https://github.com/apache/datafusion.git?rev=7c6f891b4b5a007e29fb3890ed5315ef916ae1d3#7c6f891b4b5a007e29fb3890ed5315ef916ae1d3" +version = "43.0.0" +source = "git+https://github.com/goldmedal/datafusion.git?branch=wren/support-array-struct#1862366b3543b7522e0601a7e29a7bd97adc599c" dependencies = [ "ahash", "arrow", @@ -866,12 +872,14 @@ dependencies = [ "arrow-buffer", "chrono", "datafusion-common", + "datafusion-doc", "datafusion-expr-common", "datafusion-functions-aggregate-common", "datafusion-functions-window-common", "datafusion-physical-expr-common", "indexmap 2.6.0", "paste", + "recursive", "serde_json", "sqlparser", "strum", @@ -880,8 +888,8 @@ dependencies = [ [[package]] name = "datafusion-expr-common" -version = "42.2.0" -source = "git+https://github.com/apache/datafusion.git?rev=7c6f891b4b5a007e29fb3890ed5315ef916ae1d3#7c6f891b4b5a007e29fb3890ed5315ef916ae1d3" +version = "43.0.0" +source = "git+https://github.com/goldmedal/datafusion.git?branch=wren/support-array-struct#1862366b3543b7522e0601a7e29a7bd97adc599c" dependencies = [ "arrow", "datafusion-common", @@ -891,8 +899,8 @@ dependencies = [ [[package]] name = "datafusion-functions" -version = "42.2.0" -source = "git+https://github.com/apache/datafusion.git?rev=7c6f891b4b5a007e29fb3890ed5315ef916ae1d3#7c6f891b4b5a007e29fb3890ed5315ef916ae1d3" +version = "43.0.0" +source = "git+https://github.com/goldmedal/datafusion.git?branch=wren/support-array-struct#1862366b3543b7522e0601a7e29a7bd97adc599c" dependencies = [ "arrow", "arrow-buffer", @@ -901,8 +909,10 @@ dependencies = [ "blake3", "chrono", "datafusion-common", + "datafusion-doc", "datafusion-execution", "datafusion-expr", + "datafusion-macros", "hashbrown 0.14.5", "hex", "itertools", @@ -917,8 +927,8 @@ dependencies = [ [[package]] name = "datafusion-functions-aggregate" -version = "42.2.0" -source = "git+https://github.com/apache/datafusion.git?rev=7c6f891b4b5a007e29fb3890ed5315ef916ae1d3#7c6f891b4b5a007e29fb3890ed5315ef916ae1d3" +version = "43.0.0" +source = "git+https://github.com/goldmedal/datafusion.git?branch=wren/support-array-struct#1862366b3543b7522e0601a7e29a7bd97adc599c" dependencies = [ "ahash", "arrow", @@ -937,8 +947,8 @@ dependencies = [ [[package]] name = "datafusion-functions-aggregate-common" -version = "42.2.0" -source = "git+https://github.com/apache/datafusion.git?rev=7c6f891b4b5a007e29fb3890ed5315ef916ae1d3#7c6f891b4b5a007e29fb3890ed5315ef916ae1d3" +version = "43.0.0" +source = "git+https://github.com/goldmedal/datafusion.git?branch=wren/support-array-struct#1862366b3543b7522e0601a7e29a7bd97adc599c" dependencies = [ "ahash", "arrow", @@ -950,8 +960,8 @@ dependencies = [ [[package]] name = "datafusion-functions-nested" -version = "42.2.0" -source = "git+https://github.com/apache/datafusion.git?rev=7c6f891b4b5a007e29fb3890ed5315ef916ae1d3#7c6f891b4b5a007e29fb3890ed5315ef916ae1d3" +version = "43.0.0" +source = "git+https://github.com/goldmedal/datafusion.git?branch=wren/support-array-struct#1862366b3543b7522e0601a7e29a7bd97adc599c" dependencies = [ "arrow", "arrow-array", @@ -972,8 +982,8 @@ dependencies = [ [[package]] name = "datafusion-functions-window" -version = "42.2.0" -source = "git+https://github.com/apache/datafusion.git?rev=7c6f891b4b5a007e29fb3890ed5315ef916ae1d3#7c6f891b4b5a007e29fb3890ed5315ef916ae1d3" +version = "43.0.0" +source = "git+https://github.com/goldmedal/datafusion.git?branch=wren/support-array-struct#1862366b3543b7522e0601a7e29a7bd97adc599c" dependencies = [ "datafusion-common", "datafusion-expr", @@ -986,17 +996,28 @@ dependencies = [ [[package]] name = "datafusion-functions-window-common" -version = "42.2.0" -source = "git+https://github.com/apache/datafusion.git?rev=7c6f891b4b5a007e29fb3890ed5315ef916ae1d3#7c6f891b4b5a007e29fb3890ed5315ef916ae1d3" +version = "43.0.0" +source = "git+https://github.com/goldmedal/datafusion.git?branch=wren/support-array-struct#1862366b3543b7522e0601a7e29a7bd97adc599c" dependencies = [ "datafusion-common", "datafusion-physical-expr-common", ] +[[package]] +name = "datafusion-macros" +version = "43.0.0" +source = "git+https://github.com/goldmedal/datafusion.git?branch=wren/support-array-struct#1862366b3543b7522e0601a7e29a7bd97adc599c" +dependencies = [ + "datafusion-doc", + "proc-macro2", + "quote", + "syn", +] + [[package]] name = "datafusion-optimizer" -version = "42.2.0" -source = "git+https://github.com/apache/datafusion.git?rev=7c6f891b4b5a007e29fb3890ed5315ef916ae1d3#7c6f891b4b5a007e29fb3890ed5315ef916ae1d3" +version = "43.0.0" +source = "git+https://github.com/goldmedal/datafusion.git?branch=wren/support-array-struct#1862366b3543b7522e0601a7e29a7bd97adc599c" dependencies = [ "arrow", "async-trait", @@ -1009,13 +1030,15 @@ dependencies = [ "itertools", "log", "paste", + "recursive", + "regex", "regex-syntax", ] [[package]] name = "datafusion-physical-expr" -version = "42.2.0" -source = "git+https://github.com/apache/datafusion.git?rev=7c6f891b4b5a007e29fb3890ed5315ef916ae1d3#7c6f891b4b5a007e29fb3890ed5315ef916ae1d3" +version = "43.0.0" +source = "git+https://github.com/goldmedal/datafusion.git?branch=wren/support-array-struct#1862366b3543b7522e0601a7e29a7bd97adc599c" dependencies = [ "ahash", "arrow", @@ -1041,8 +1064,8 @@ dependencies = [ [[package]] name = "datafusion-physical-expr-common" -version = "42.2.0" -source = "git+https://github.com/apache/datafusion.git?rev=7c6f891b4b5a007e29fb3890ed5315ef916ae1d3#7c6f891b4b5a007e29fb3890ed5315ef916ae1d3" +version = "43.0.0" +source = "git+https://github.com/goldmedal/datafusion.git?branch=wren/support-array-struct#1862366b3543b7522e0601a7e29a7bd97adc599c" dependencies = [ "ahash", "arrow", @@ -1055,23 +1078,26 @@ dependencies = [ [[package]] name = "datafusion-physical-optimizer" -version = "42.2.0" -source = "git+https://github.com/apache/datafusion.git?rev=7c6f891b4b5a007e29fb3890ed5315ef916ae1d3#7c6f891b4b5a007e29fb3890ed5315ef916ae1d3" +version = "43.0.0" +source = "git+https://github.com/goldmedal/datafusion.git?branch=wren/support-array-struct#1862366b3543b7522e0601a7e29a7bd97adc599c" dependencies = [ "arrow", "arrow-schema", "datafusion-common", "datafusion-execution", "datafusion-expr-common", + "datafusion-optimizer", "datafusion-physical-expr", "datafusion-physical-plan", "itertools", + "log", + "recursive", ] [[package]] name = "datafusion-physical-plan" -version = "42.2.0" -source = "git+https://github.com/apache/datafusion.git?rev=7c6f891b4b5a007e29fb3890ed5315ef916ae1d3#7c6f891b4b5a007e29fb3890ed5315ef916ae1d3" +version = "43.0.0" +source = "git+https://github.com/goldmedal/datafusion.git?branch=wren/support-array-struct#1862366b3543b7522e0601a7e29a7bd97adc599c" dependencies = [ "ahash", "arrow", @@ -1104,8 +1130,8 @@ dependencies = [ [[package]] name = "datafusion-sql" -version = "42.2.0" -source = "git+https://github.com/apache/datafusion.git?rev=7c6f891b4b5a007e29fb3890ed5315ef916ae1d3#7c6f891b4b5a007e29fb3890ed5315ef916ae1d3" +version = "43.0.0" +source = "git+https://github.com/goldmedal/datafusion.git?branch=wren/support-array-struct#1862366b3543b7522e0601a7e29a7bd97adc599c" dependencies = [ "arrow", "arrow-array", @@ -1114,6 +1140,7 @@ dependencies = [ "datafusion-expr", "indexmap 2.6.0", "log", + "recursive", "regex", "sqlparser", "strum", @@ -1624,18 +1651,6 @@ version = "2.0.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b248f5224d1d606005e02c97f5aa4e88eeb230488bcc03bc9ca4d7991399f2b5" -[[package]] -name = "instant" -version = "0.1.13" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e0242819d153cba4b4b05a5a8f2a7e9bbf97b6055b2a002b395c96b5ff3c0222" -dependencies = [ - "cfg-if", - "js-sys", - "wasm-bindgen", - "web-sys", -] - [[package]] name = "integer-encoding" version = "3.0.4" @@ -2005,9 +2020,9 @@ dependencies = [ [[package]] name = "parquet" -version = "53.2.0" +version = "53.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "dea02606ba6f5e856561d8d507dba8bac060aefca2a6c0f1aa1d361fed91ff3e" +checksum = "2b449890367085eb65d7d3321540abc3d7babbd179ce31df0016e90719114191" dependencies = [ "ahash", "arrow-array", @@ -2024,7 +2039,7 @@ dependencies = [ "flate2", "futures", "half", - "hashbrown 0.14.5", + "hashbrown 0.15.1", "lz4_flex", "num", "num-bigint", @@ -2166,6 +2181,15 @@ dependencies = [ "unicode-ident", ] +[[package]] +name = "psm" +version = "0.1.24" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "200b9ff220857e53e184257720a14553b2f4aa02577d2ed9842d45d4b9654810" +dependencies = [ + "cc", +] + [[package]] name = "pyo3" version = "0.21.2" @@ -2278,6 +2302,26 @@ dependencies = [ "getrandom", ] +[[package]] +name = "recursive" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0786a43debb760f491b1bc0269fe5e84155353c67482b9e60d0cfb596054b43e" +dependencies = [ + "recursive-proc-macro-impl", + "stacker", +] + +[[package]] +name = "recursive-proc-macro-impl" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "76009fbe0614077fc1a2ce255e3a1881a2e3a3527097d5dc6d8212c585e7e38b" +dependencies = [ + "quote", + "syn", +] + [[package]] name = "redox_syscall" version = "0.5.7" @@ -2512,9 +2556,8 @@ checksum = "1b6b67fb9a61334225b5b790716f609cd58395f895b3fe8b328786812a40bc3b" [[package]] name = "sqlparser" -version = "0.51.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5fe11944a61da0da3f592e19a45ebe5ab92dc14a779907ff1f08fbb797bfefc7" +version = "0.52.0" +source = "git+https://github.com/goldmedal/sqlparser-rs.git?branch=wren/0.12.3-array-struct#bb1833fe81508ac84be0b757d0b52394f3c97a6f" dependencies = [ "log", "sqlparser_derive", @@ -2523,8 +2566,7 @@ dependencies = [ [[package]] name = "sqlparser_derive" version = "0.2.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "01b2e185515564f15375f593fb966b5718bc624ba77fe49fa4616ad619690554" +source = "git+https://github.com/goldmedal/sqlparser-rs.git?branch=wren/0.12.3-array-struct#bb1833fe81508ac84be0b757d0b52394f3c97a6f" dependencies = [ "proc-macro2", "quote", @@ -2537,6 +2579,19 @@ version = "1.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a8f112729512f8e442d81f95a8a7ddf2b7c6b8a1a6f509a95864142b30cab2d3" +[[package]] +name = "stacker" +version = "0.1.17" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "799c883d55abdb5e98af1a7b3f23b9b6de8ecada0ecac058672d7635eb48ca7b" +dependencies = [ + "cc", + "cfg-if", + "libc", + "psm", + "windows-sys 0.52.0", +] + [[package]] name = "static_assertions" version = "1.1.0" @@ -2922,10 +2977,10 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "65fc09f10666a9f147042251e0dda9c18f166ff7de300607007e96bdebc1068d" [[package]] -name = "web-sys" -version = "0.3.72" +name = "web-time" +version = "1.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f6488b90108c040df0fe62fa815cbdee25124641df01814dd7282749234c6112" +checksum = "5a6580f308b1fad9207618087a65c04e7a10bc77e02c8e84e9b00dd4b12fa0bb" dependencies = [ "js-sys", "wasm-bindgen", diff --git a/wren-core/Cargo.toml b/wren-core/Cargo.toml index a705546ff..bc7dcc037 100644 --- a/wren-core/Cargo.toml +++ b/wren-core/Cargo.toml @@ -16,7 +16,9 @@ version = "0.1.0" async-trait = "0.1.80" # We require the following commits # https://github.com/apache/datafusion/pull/13241 -datafusion = { git = "https://github.com/apache/datafusion.git", rev = "7c6f891b4b5a007e29fb3890ed5315ef916ae1d3" } +# https://github.com/goldmedal/datafusion/pull/1 +# https://github.com/goldmedal/sqlparser-rs/pull/1 +datafusion = { git = "https://github.com/goldmedal/datafusion.git", branch = "wren/support-array-struct" } env_logger = "0.11.3" log = { version = "0.4.14" } serde = { version = "1.0.201", features = ["derive", "rc"] } @@ -24,3 +26,4 @@ serde_json = { version = "1.0.117" } serde_with = { version = "3.11.0" } tokio = { version = "1.4.0", features = ["rt", "rt-multi-thread", "macros"] } wren-core = { path = "core" } +hashbrown = "0.15.2" diff --git a/wren-core/sqllogictest/test_files/tpch/q7.slt.part b/wren-core/sqllogictest/test_files/tpch/q7.slt.part index a62aaf972..c806bafa5 100644 --- a/wren-core/sqllogictest/test_files/tpch/q7.slt.part +++ b/wren-core/sqllogictest/test_files/tpch/q7.slt.part @@ -16,7 +16,7 @@ # specific language governing permissions and limitations # under the License. -query TTRR +query TTIR select supp_nation, cust_nation, diff --git a/wren-core/sqllogictest/test_files/tpch/q8.slt.part b/wren-core/sqllogictest/test_files/tpch/q8.slt.part index bdd6b1669..27f907780 100644 --- a/wren-core/sqllogictest/test_files/tpch/q8.slt.part +++ b/wren-core/sqllogictest/test_files/tpch/q8.slt.part @@ -16,7 +16,7 @@ # specific language governing permissions and limitations # under the License. -query RR +query IR select o_year, cast(cast(sum(case diff --git a/wren-core/sqllogictest/test_files/tpch/q9.slt.part b/wren-core/sqllogictest/test_files/tpch/q9.slt.part index 0e3e7aafc..94ed4845b 100644 --- a/wren-core/sqllogictest/test_files/tpch/q9.slt.part +++ b/wren-core/sqllogictest/test_files/tpch/q9.slt.part @@ -16,7 +16,7 @@ # specific language governing permissions and limitations # under the License. -query TRR +query TIR select nation, o_year, From ef3bcda1e56aa3adc98b1c734e18c1ac93ee2d55 Mon Sep 17 00:00:00 2001 From: Jia-Xuan Liu Date: Wed, 27 Nov 2024 14:22:15 +0800 Subject: [PATCH 2/8] fix test --- wren-core/core/src/mdl/dialect.rs | 15 ++++++--------- wren-core/core/src/mdl/mod.rs | 19 ++++++++++++++++--- 2 files changed, 22 insertions(+), 12 deletions(-) diff --git a/wren-core/core/src/mdl/dialect.rs b/wren-core/core/src/mdl/dialect.rs index a060b9ba1..9e5e47793 100644 --- a/wren-core/core/src/mdl/dialect.rs +++ b/wren-core/core/src/mdl/dialect.rs @@ -142,18 +142,15 @@ impl WrenDialect { return internal_err!("get_fields must have exactly 2 argument"); } - let mut exprs = match unparser.expr_to_sql(&args[0])? { - ast::Expr::CompoundIdentifier(exprs) => exprs, - ast::Expr::Identifier(ident) => vec![ident], - // If the first argument is not identifiers, unparse it as ScalarFunction - _ => return Ok(None), - }; - + let sql = unparser.expr_to_sql(&args[0])?; if let ast::Expr::Value(Value::SingleQuotedString(field_name)) = unparser.expr_to_sql(&args[1])? { - exprs.extend(vec![self.new_ident_quoted_if_needs(field_name)]); - return Ok(Some(ast::Expr::CompoundIdentifier(exprs))); + let key = self.new_ident_quoted_if_needs(field_name); + return Ok(Some(ast::Expr::CompositeAccess { + expr: Box::new(sql), + key, + })); } Ok(None) diff --git a/wren-core/core/src/mdl/mod.rs b/wren-core/core/src/mdl/mod.rs index 10ffd41c6..9bea8f357 100644 --- a/wren-core/core/src/mdl/mod.rs +++ b/wren-core/core/src/mdl/mod.rs @@ -415,9 +415,9 @@ mod test { use crate::mdl::function::RemoteFunction; use crate::mdl::manifest::Manifest; use crate::mdl::{self, transform_sql_with_ctx, AnalyzedWrenMDL}; - use datafusion::arrow::array::{ - ArrayRef, Int64Array, RecordBatch, StringArray, TimestampNanosecondArray, - }; + use datafusion::arrow::array::{Array, ArrayData, ArrayRef, Float64Array, Int64Array, ListArray, RecordBatch, StringArray, StructArray, TimestampNanosecondArray}; + use datafusion::arrow::buffer::Buffer; + use datafusion::arrow::datatypes::{DataType, Field, Fields, TimeUnit}; use datafusion::assert_batches_eq; use datafusion::common::not_impl_err; use datafusion::common::Result; @@ -1134,6 +1134,12 @@ mod test { ) .build(), ) + .column( + ColumnBuilder::new( + "struct_array_col", + "array>", + ).build(), + ) .build(), ) .build(); @@ -1145,6 +1151,13 @@ mod test { (SELECT struct_table.struct_col FROM (SELECT struct_table.struct_col AS struct_col \ FROM struct_table) AS struct_table) AS struct_table"); + + let sql = "select struct_array_col[1].float_field from wren.test.struct_table"; + let actual = transform_sql_with_ctx(&ctx, Arc::clone(&analyzed_mdl), &[], sql).await?; + assert_eq!(actual, "SELECT struct_table.struct_array_col[1].float_field FROM \ + (SELECT struct_table.struct_array_col FROM (SELECT struct_table.struct_array_col AS struct_array_col \ + FROM struct_table) AS struct_table) AS struct_table"); + let sql = "select {float_field: 1.0, time_field: timestamp '2021-01-01 00:00:00'}"; let actual = From 5769c3165364659f9dad027c9b040bb4c39a1502 Mon Sep 17 00:00:00 2001 From: Jia-Xuan Liu Date: Wed, 27 Nov 2024 14:47:04 +0800 Subject: [PATCH 3/8] upgrade type signature --- wren-core/core/src/mdl/function.rs | 6 +++--- wren-core/core/src/mdl/mod.rs | 4 +--- 2 files changed, 4 insertions(+), 6 deletions(-) diff --git a/wren-core/core/src/mdl/function.rs b/wren-core/core/src/mdl/function.rs index 560933a54..13c6b122d 100644 --- a/wren-core/core/src/mdl/function.rs +++ b/wren-core/core/src/mdl/function.rs @@ -73,7 +73,7 @@ impl ByPassScalarUDF { signature: Signature::one_of( vec![ TypeSignature::VariadicAny, - TypeSignature::Uniform(0, vec![]), + TypeSignature::NullAry, ], Volatility::Volatile, ), @@ -120,7 +120,7 @@ impl ByPassAggregateUDF { signature: Signature::one_of( vec![ TypeSignature::VariadicAny, - TypeSignature::Uniform(0, vec![]), + TypeSignature::NullAry, ], Volatility::Volatile, ), @@ -167,7 +167,7 @@ impl ByPassWindowFunction { signature: Signature::one_of( vec![ TypeSignature::VariadicAny, - TypeSignature::Uniform(0, vec![]), + TypeSignature::NullAry, ], Volatility::Volatile, ), diff --git a/wren-core/core/src/mdl/mod.rs b/wren-core/core/src/mdl/mod.rs index 9bea8f357..8f783717f 100644 --- a/wren-core/core/src/mdl/mod.rs +++ b/wren-core/core/src/mdl/mod.rs @@ -415,9 +415,7 @@ mod test { use crate::mdl::function::RemoteFunction; use crate::mdl::manifest::Manifest; use crate::mdl::{self, transform_sql_with_ctx, AnalyzedWrenMDL}; - use datafusion::arrow::array::{Array, ArrayData, ArrayRef, Float64Array, Int64Array, ListArray, RecordBatch, StringArray, StructArray, TimestampNanosecondArray}; - use datafusion::arrow::buffer::Buffer; - use datafusion::arrow::datatypes::{DataType, Field, Fields, TimeUnit}; + use datafusion::arrow::array::{ArrayRef, Int64Array, RecordBatch, StringArray, TimestampNanosecondArray}; use datafusion::assert_batches_eq; use datafusion::common::not_impl_err; use datafusion::common::Result; From a7df53fd42bbd978bda8ab7f348f2184616c495d Mon Sep 17 00:00:00 2001 From: Jia-Xuan Liu Date: Wed, 27 Nov 2024 14:53:07 +0800 Subject: [PATCH 4/8] fix fmt --- wren-core/core/src/mdl/function.rs | 15 +++------------ wren-core/core/src/mdl/mod.rs | 13 ++++++++----- 2 files changed, 11 insertions(+), 17 deletions(-) diff --git a/wren-core/core/src/mdl/function.rs b/wren-core/core/src/mdl/function.rs index 13c6b122d..d1505ddde 100644 --- a/wren-core/core/src/mdl/function.rs +++ b/wren-core/core/src/mdl/function.rs @@ -71,10 +71,7 @@ impl ByPassScalarUDF { name: name.to_string(), return_type, signature: Signature::one_of( - vec![ - TypeSignature::VariadicAny, - TypeSignature::NullAry, - ], + vec![TypeSignature::VariadicAny, TypeSignature::NullAry], Volatility::Volatile, ), } @@ -118,10 +115,7 @@ impl ByPassAggregateUDF { name: name.to_string(), return_type, signature: Signature::one_of( - vec![ - TypeSignature::VariadicAny, - TypeSignature::NullAry, - ], + vec![TypeSignature::VariadicAny, TypeSignature::NullAry], Volatility::Volatile, ), } @@ -165,10 +159,7 @@ impl ByPassWindowFunction { name: name.to_string(), return_type, signature: Signature::one_of( - vec![ - TypeSignature::VariadicAny, - TypeSignature::NullAry, - ], + vec![TypeSignature::VariadicAny, TypeSignature::NullAry], Volatility::Volatile, ), } diff --git a/wren-core/core/src/mdl/mod.rs b/wren-core/core/src/mdl/mod.rs index 8f783717f..74685974d 100644 --- a/wren-core/core/src/mdl/mod.rs +++ b/wren-core/core/src/mdl/mod.rs @@ -415,7 +415,9 @@ mod test { use crate::mdl::function::RemoteFunction; use crate::mdl::manifest::Manifest; use crate::mdl::{self, transform_sql_with_ctx, AnalyzedWrenMDL}; - use datafusion::arrow::array::{ArrayRef, Int64Array, RecordBatch, StringArray, TimestampNanosecondArray}; + use datafusion::arrow::array::{ + ArrayRef, Int64Array, RecordBatch, StringArray, TimestampNanosecondArray, + }; use datafusion::assert_batches_eq; use datafusion::common::not_impl_err; use datafusion::common::Result; @@ -1136,7 +1138,8 @@ mod test { ColumnBuilder::new( "struct_array_col", "array>", - ).build(), + ) + .build(), ) .build(), ) @@ -1149,9 +1152,9 @@ mod test { (SELECT struct_table.struct_col FROM (SELECT struct_table.struct_col AS struct_col \ FROM struct_table) AS struct_table) AS struct_table"); - - let sql = "select struct_array_col[1].float_field from wren.test.struct_table"; - let actual = transform_sql_with_ctx(&ctx, Arc::clone(&analyzed_mdl), &[], sql).await?; + let sql = "select struct_array_col[1].float_field from wren.test.struct_table"; + let actual = + transform_sql_with_ctx(&ctx, Arc::clone(&analyzed_mdl), &[], sql).await?; assert_eq!(actual, "SELECT struct_table.struct_array_col[1].float_field FROM \ (SELECT struct_table.struct_array_col FROM (SELECT struct_table.struct_array_col AS struct_array_col \ FROM struct_table) AS struct_table) AS struct_table"); From 2db41a815d27d7ba168a9936b5adf640849da798 Mon Sep 17 00:00:00 2001 From: Jia-Xuan Liu Date: Wed, 27 Nov 2024 15:18:57 +0800 Subject: [PATCH 5/8] fix python test --- wren-core-py/tests/test_modeling_core.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/wren-core-py/tests/test_modeling_core.py b/wren-core-py/tests/test_modeling_core.py index f94357334..f891b6941 100644 --- a/wren-core-py/tests/test_modeling_core.py +++ b/wren-core-py/tests/test_modeling_core.py @@ -46,14 +46,14 @@ def test_read_function_list(): path = "tests/functions.csv" session_context = SessionContext(manifest_str, path) functions = session_context.get_available_functions() - assert len(functions) == 271 + assert len(functions) == 272 rewritten_sql = session_context.transform_sql("SELECT add_two(c_custkey) FROM my_catalog.my_schema.customer") assert rewritten_sql == 'SELECT add_two(customer.c_custkey) FROM (SELECT customer.c_custkey FROM (SELECT customer.c_custkey AS c_custkey FROM main.customer) AS customer) AS customer' session_context = SessionContext(manifest_str, None) functions = session_context.get_available_functions() - assert len(functions) == 269 + assert len(functions) == 270 def test_get_available_functions(): From 887d282f2ba47a4c7f540064eaf0b49ca092fe41 Mon Sep 17 00:00:00 2001 From: Jia-Xuan Liu Date: Wed, 27 Nov 2024 15:25:26 +0800 Subject: [PATCH 6/8] taplo fmt --- wren-core/Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/wren-core/Cargo.toml b/wren-core/Cargo.toml index bc7dcc037..5ff81c851 100644 --- a/wren-core/Cargo.toml +++ b/wren-core/Cargo.toml @@ -20,10 +20,10 @@ async-trait = "0.1.80" # https://github.com/goldmedal/sqlparser-rs/pull/1 datafusion = { git = "https://github.com/goldmedal/datafusion.git", branch = "wren/support-array-struct" } env_logger = "0.11.3" +hashbrown = "0.15.2" log = { version = "0.4.14" } serde = { version = "1.0.201", features = ["derive", "rc"] } serde_json = { version = "1.0.117" } serde_with = { version = "3.11.0" } tokio = { version = "1.4.0", features = ["rt", "rt-multi-thread", "macros"] } wren-core = { path = "core" } -hashbrown = "0.15.2" From 3b2bf42551db3c2eb77a3aae342f2204f627d69a Mon Sep 17 00:00:00 2001 From: Jia-Xuan Liu Date: Wed, 27 Nov 2024 15:44:42 +0800 Subject: [PATCH 7/8] update datafusion function number --- ibis-server/tests/conftest.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ibis-server/tests/conftest.py b/ibis-server/tests/conftest.py index 4d671fb08..6fae2f0bd 100644 --- a/ibis-server/tests/conftest.py +++ b/ibis-server/tests/conftest.py @@ -5,4 +5,4 @@ def file_path(path: str) -> str: return os.path.join(os.path.dirname(__file__), path) -DATAFUSION_FUNCTION_COUNT = 269 +DATAFUSION_FUNCTION_COUNT = 270 From 731baa437ecf103d30ea4d7811d0b3a1d5d1c156 Mon Sep 17 00:00:00 2001 From: Jia-Xuan Liu Date: Wed, 27 Nov 2024 16:00:48 +0800 Subject: [PATCH 8/8] update the function count --- .../tests/routers/v3/connector/bigquery/test_functions.py | 2 +- .../tests/routers/v3/connector/postgres/test_functions.py | 2 +- .../tests/routers/v3/connector/snowflake/test_functions.py | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/ibis-server/tests/routers/v3/connector/bigquery/test_functions.py b/ibis-server/tests/routers/v3/connector/bigquery/test_functions.py index 477a8b8c5..61bcc1990 100644 --- a/ibis-server/tests/routers/v3/connector/bigquery/test_functions.py +++ b/ibis-server/tests/routers/v3/connector/bigquery/test_functions.py @@ -47,7 +47,7 @@ def test_function_list(): response = client.get(url=f"{base_url}/functions") assert response.status_code == 200 result = response.json() - assert len(result) == DATAFUSION_FUNCTION_COUNT + 34 + assert len(result) == DATAFUSION_FUNCTION_COUNT + 33 the_func = next(filter(lambda x: x["name"] == "abs", result)) assert the_func == { "name": "abs", diff --git a/ibis-server/tests/routers/v3/connector/postgres/test_functions.py b/ibis-server/tests/routers/v3/connector/postgres/test_functions.py index a65872dbe..1d661fca4 100644 --- a/ibis-server/tests/routers/v3/connector/postgres/test_functions.py +++ b/ibis-server/tests/routers/v3/connector/postgres/test_functions.py @@ -57,7 +57,7 @@ def test_function_list(): response = client.get(url=f"{base_url}/functions") assert response.status_code == 200 result = response.json() - assert len(result) == DATAFUSION_FUNCTION_COUNT + 49 + assert len(result) == DATAFUSION_FUNCTION_COUNT + 48 the_func = next(filter(lambda x: x["name"] == "abs", result)) assert the_func == { "name": "abs", diff --git a/ibis-server/tests/routers/v3/connector/snowflake/test_functions.py b/ibis-server/tests/routers/v3/connector/snowflake/test_functions.py index 51db30c46..e4c6d9c2b 100644 --- a/ibis-server/tests/routers/v3/connector/snowflake/test_functions.py +++ b/ibis-server/tests/routers/v3/connector/snowflake/test_functions.py @@ -57,7 +57,7 @@ def test_function_list(): response = client.get(url=f"{base_url}/functions") assert response.status_code == 200 result = response.json() - assert len(result) == DATAFUSION_FUNCTION_COUNT + 71 + assert len(result) == DATAFUSION_FUNCTION_COUNT + 70 the_func = next(filter(lambda x: x["name"] == "abs", result)) assert the_func == { "name": "abs",