From 66bdcd208a6548c555748ad84d11a25fb74a1267 Mon Sep 17 00:00:00 2001 From: Albert Skalt Date: Thu, 24 Jul 2025 17:17:34 +0300 Subject: [PATCH] Bump arrow-rs, parquet to `54.0.0` and pyo3 to `0.23.3` --- Cargo.toml | 18 +-- datafusion-cli/Cargo.lock | 104 +++++++++--------- datafusion-cli/Cargo.toml | 4 +- datafusion/common/Cargo.toml | 2 +- datafusion/sqllogictest/test_files/dates.slt | 2 +- .../sqllogictest/test_files/timestamps.slt | 48 ++++---- 6 files changed, 92 insertions(+), 86 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index a7b2727bd882..4e4922412dee 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -69,22 +69,22 @@ version = "42.2.0" ahash = { version = "0.8", default-features = false, features = [ "runtime-rng", ] } -arrow = { version = "53.1.0", features = [ +arrow = { version = "54.0.0", features = [ "prettyprint", ] } -arrow-array = { version = "53.1.0", default-features = false, features = [ +arrow-array = { version = "54.0.0", default-features = false, features = [ "chrono-tz", ] } -arrow-buffer = { version = "53.1.0", default-features = false } -arrow-flight = { version = "53.1.0", features = [ +arrow-buffer = { version = "54.0.0", default-features = false } +arrow-flight = { version = "54.0.0", features = [ "flight-sql-experimental", ] } -arrow-ipc = { version = "53.1.0", default-features = false, features = [ +arrow-ipc = { version = "54.0.0", default-features = false, features = [ "lz4", ] } -arrow-ord = { version = "53.1.0", default-features = false } -arrow-schema = { version = "53.1.0", default-features = false } -arrow-string = { version = "53.1.0", default-features = false } +arrow-ord = { version = "54.0.0", default-features = false } +arrow-schema = { version = "54.0.0", default-features = false } +arrow-string = { version = "54.0.0", default-features = false } async-trait = "0.1.73" bigdecimal = "=0.4.1" bytes = "1.4" @@ -124,7 +124,7 @@ log = "^0.4" num_cpus = "1.13.0" object_store = { version = "0.11.0", default-features = false } parking_lot = "0.12" -parquet = { version = "53.1.0", default-features = false, features = [ +parquet = { version = "54.0.0", default-features = false, features = [ "arrow", "async", "object_store", diff --git a/datafusion-cli/Cargo.lock b/datafusion-cli/Cargo.lock index 5599084f68ce..93919cb73bf1 100644 --- a/datafusion-cli/Cargo.lock +++ b/datafusion-cli/Cargo.lock @@ -173,9 +173,9 @@ checksum = "7c02d123df017efcdfbd739ef81735b36c5ba83ec3c59c80a9d7ecc718f92e50" [[package]] name = "arrow" -version = "53.2.0" +version = "54.3.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4caf25cdc4a985f91df42ed9e9308e1adbcd341a31a72605c697033fcef163e3" +checksum = "b5ec52ba94edeed950e4a41f75d35376df196e8cb04437f7280a5aa49f20f796" dependencies = [ "arrow-arith", "arrow-array", @@ -194,24 +194,23 @@ dependencies = [ [[package]] name = "arrow-arith" -version = "53.2.0" +version = "54.3.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "91f2dfd1a7ec0aca967dfaa616096aec49779adc8eccec005e2f5e4111b1192a" +checksum = "8fc766fdacaf804cb10c7c70580254fcdb5d55cdfda2bc57b02baf5223a3af9e" dependencies = [ "arrow-array", "arrow-buffer", "arrow-data", "arrow-schema", "chrono", - "half", "num", ] [[package]] name = "arrow-array" -version = "53.2.0" +version = "54.3.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d39387ca628be747394890a6e47f138ceac1aa912eab64f02519fed24b637af8" +checksum = "a12fcdb3f1d03f69d3ec26ac67645a8fe3f878d77b5ebb0b15d64a116c212985" dependencies = [ "ahash", "arrow-buffer", @@ -220,15 +219,15 @@ dependencies = [ "chrono", "chrono-tz", "half", - "hashbrown 0.14.5", + "hashbrown 0.15.4", "num", ] [[package]] name = "arrow-buffer" -version = "53.2.0" +version = "54.3.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9e51e05228852ffe3eb391ce7178a0f97d2cf80cc6ef91d3c4a6b3cb688049ec" +checksum = "263f4801ff1839ef53ebd06f99a56cecd1dbaf314ec893d93168e2e860e0291c" dependencies = [ "bytes", "half", @@ -237,9 +236,9 @@ dependencies = [ [[package]] name = "arrow-cast" -version = "53.2.0" +version = "54.3.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d09aea56ec9fa267f3f3f6cdab67d8a9974cbba90b3aa38c8fe9d0bb071bd8c1" +checksum = "ede6175fbc039dfc946a61c1b6d42fd682fcecf5ab5d148fbe7667705798cac9" dependencies = [ "arrow-array", "arrow-buffer", @@ -258,28 +257,25 @@ dependencies = [ [[package]] name = "arrow-csv" -version = "53.2.0" +version = "54.3.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c07b5232be87d115fde73e32f2ca7f1b353bff1b44ac422d3c6fc6ae38f11f0d" +checksum = "1644877d8bc9a0ef022d9153dc29375c2bda244c39aec05a91d0e87ccf77995f" dependencies = [ "arrow-array", - "arrow-buffer", "arrow-cast", - "arrow-data", "arrow-schema", "chrono", "csv", "csv-core", "lazy_static", - "lexical-core", "regex", ] [[package]] name = "arrow-data" -version = "53.2.0" +version = "54.3.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b98ae0af50890b494cebd7d6b04b35e896205c1d1df7b29a6272c5d0d0249ef5" +checksum = "61cfdd7d99b4ff618f167e548b2411e5dd2c98c0ddebedd7df433d34c20a4429" dependencies = [ "arrow-buffer", "arrow-schema", @@ -289,13 +285,12 @@ dependencies = [ [[package]] name = "arrow-ipc" -version = "53.2.0" +version = "54.3.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0ed91bdeaff5a1c00d28d8f73466bcb64d32bbd7093b5a30156b4b9f4dba3eee" +checksum = "62ff528658b521e33905334723b795ee56b393dbe9cf76c8b1f64b648c65a60c" dependencies = [ "arrow-array", "arrow-buffer", - "arrow-cast", "arrow-data", "arrow-schema", "flatbuffers", @@ -304,9 +299,9 @@ dependencies = [ [[package]] name = "arrow-json" -version = "53.2.0" +version = "54.3.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0471f51260a5309307e5d409c9dc70aede1cd9cf1d4ff0f0a1e8e1a2dd0e0d3c" +checksum = "0ee5b4ca98a7fb2efb9ab3309a5d1c88b5116997ff93f3147efdc1062a6158e9" dependencies = [ "arrow-array", "arrow-buffer", @@ -317,33 +312,32 @@ dependencies = [ "half", "indexmap", "lexical-core", + "memchr", "num", "serde", "serde_json", + "simdutf8", ] [[package]] name = "arrow-ord" -version = "53.2.0" +version = "54.3.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2883d7035e0b600fb4c30ce1e50e66e53d8656aa729f2bfa4b51d359cf3ded52" +checksum = "f0a3334a743bd2a1479dbc635540617a3923b4b2f6870f37357339e6b5363c21" dependencies = [ "arrow-array", "arrow-buffer", "arrow-data", "arrow-schema", "arrow-select", - "half", - "num", ] [[package]] name = "arrow-row" -version = "53.2.0" +version = "54.3.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "552907e8e587a6fde4f8843fd7a27a576a260f65dab6c065741ea79f633fc5be" +checksum = "8d1d7a7291d2c5107e92140f75257a99343956871f3d3ab33a7b41532f79cb68" dependencies = [ - "ahash", "arrow-array", "arrow-buffer", "arrow-data", @@ -353,15 +347,15 @@ dependencies = [ [[package]] name = "arrow-schema" -version = "53.2.0" +version = "54.3.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "539ada65246b949bd99ffa0881a9a15a4a529448af1a07a9838dd78617dafab1" +checksum = "39cfaf5e440be44db5413b75b72c2a87c1f8f0627117d110264048f2969b99e9" [[package]] name = "arrow-select" -version = "53.2.0" +version = "54.3.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6259e566b752da6dceab91766ed8b2e67bf6270eb9ad8a6e07a33c1bede2b125" +checksum = "69efcd706420e52cd44f5c4358d279801993846d1c2a8e52111853d61d55a619" dependencies = [ "ahash", "arrow-array", @@ -373,9 +367,9 @@ dependencies = [ [[package]] name = "arrow-string" -version = "53.2.0" +version = "54.3.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f3179ccbd18ebf04277a095ba7321b93fd1f774f18816bd5f6b3ce2f594edb6c" +checksum = "a21546b337ab304a32cfc0770f671db7411787586b45b78b4593ae78e64e2b03" dependencies = [ "arrow-array", "arrow-buffer", @@ -946,15 +940,15 @@ checksum = "613afe47fcd5fac7ccf1db93babcb082c5994d996f20b8b159f2ad1658eb5724" [[package]] name = "chrono" -version = "0.4.38" +version = "0.4.41" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a21f936df1771bf62b77f047b726c4625ff2e8aa607c01ec06e5a05bd8463401" +checksum = "c469d952047f47f91b68d1cba3f10d63c11d73e4636f24f08daf0278abf01c4d" dependencies = [ "android-tzdata", "iana-time-zone", "num-traits", "serde", - "windows-targets 0.52.6", + "windows-link", ] [[package]] @@ -1707,9 +1701,9 @@ checksum = "0ce7134b9999ecaf8bcd65542e436736ef32ddca1b3e06094cb6ec5755203b80" [[package]] name = "flatbuffers" -version = "24.3.25" +version = "24.12.23" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8add37afff2d4ffa83bc748a70b4b1370984f6980768554182424ef71447c35f" +checksum = "4f1baf0dbf96932ec9a3038d57900329c015b0bfb7b63d904f3bc27e2b02a096" dependencies = [ "bitflags 1.3.2", "rustc_version", @@ -1938,9 +1932,9 @@ dependencies = [ [[package]] name = "hashbrown" -version = "0.15.0" +version = "0.15.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1e087f84d4f86bf4b218b927129862374b72199ae7d8657835f1e89000eea4fb" +checksum = "5971ac85611da7067dbfcabef3c70ebb5606018acd9e2a3903a0da507521e0d5" [[package]] name = "heck" @@ -2195,7 +2189,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "707907fe3c25f5424cce2cb7e1cbcafee6bdbe735ca90ef77c29e84591e5b9da" dependencies = [ "equivalent", - "hashbrown 0.15.0", + "hashbrown 0.15.4", ] [[package]] @@ -2696,9 +2690,9 @@ dependencies = [ [[package]] name = "parquet" -version = "53.2.0" +version = "54.3.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "dea02606ba6f5e856561d8d507dba8bac060aefca2a6c0f1aa1d361fed91ff3e" +checksum = "bfb15796ac6f56b429fd99e33ba133783ad75b27c36b4b5ce06f1f82cc97754e" dependencies = [ "ahash", "arrow-array", @@ -2715,19 +2709,19 @@ dependencies = [ "flate2", "futures", "half", - "hashbrown 0.14.5", + "hashbrown 0.15.4", "lz4_flex", "num", "num-bigint", "object_store", "paste", "seq-macro", + "simdutf8", "snap", "thrift", "tokio", "twox-hash", "zstd 0.13.2", - "zstd-sys", ] [[package]] @@ -3454,6 +3448,12 @@ dependencies = [ "libc", ] +[[package]] +name = "simdutf8" +version = "0.1.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e3a9fe34e3e7a50316060351f37187a3f546bce95496156754b601a5fa71b76e" + [[package]] name = "siphasher" version = "0.3.11" @@ -4107,6 +4107,12 @@ dependencies = [ "windows-targets 0.52.6", ] +[[package]] +name = "windows-link" +version = "0.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5e6ad25900d524eaabdbbb96d20b4311e1e7ae1699af4fb28c17ae66c80d798a" + [[package]] name = "windows-registry" version = "0.2.0" diff --git a/datafusion-cli/Cargo.toml b/datafusion-cli/Cargo.toml index f1420ecf53dc..3b164b3ea1df 100644 --- a/datafusion-cli/Cargo.toml +++ b/datafusion-cli/Cargo.toml @@ -29,7 +29,7 @@ rust-version = "1.82.0" readme = "README.md" [dependencies] -arrow = { version = "53.1.0" } +arrow = { version = "54.0.0" } async-trait = "0.1.73" aws-config = "1.5.5" # begin pin aws-sdk crates otherwise CI MSRV check fails @@ -57,7 +57,7 @@ futures = "0.3" mimalloc = { version = "0.1", default-features = false } object_store = { version = "0.11.0", features = ["aws", "gcp", "http"] } parking_lot = { version = "0.12" } -parquet = { version = "53.1.0", default-features = false } +parquet = { version = "54.0.0", default-features = false } regex = "1.8" rustyline = "14.0" tokio = { version = "1.24", features = ["macros", "rt", "rt-multi-thread", "sync", "parking_lot", "signal"] } diff --git a/datafusion/common/Cargo.toml b/datafusion/common/Cargo.toml index 1ac27b40c219..fac3900010af 100644 --- a/datafusion/common/Cargo.toml +++ b/datafusion/common/Cargo.toml @@ -61,7 +61,7 @@ num_cpus = { workspace = true } object_store = { workspace = true, optional = true } parquet = { workspace = true, optional = true, default-features = true } paste = "1.0.15" -pyo3 = { version = "0.22.0", optional = true } +pyo3 = { version = "0.23.3", optional = true } sqlparser = { workspace = true } tokio = { workspace = true } diff --git a/datafusion/sqllogictest/test_files/dates.slt b/datafusion/sqllogictest/test_files/dates.slt index 3950a165a004..90892d2770b2 100644 --- a/datafusion/sqllogictest/test_files/dates.slt +++ b/datafusion/sqllogictest/test_files/dates.slt @@ -183,7 +183,7 @@ query error input contains invalid characters SELECT to_date('2020-09-08 12/00/00+00:00', '%c', '%+') # to_date with broken formatting -query error bad or unsupported format string +query error Error parsing timestamp SELECT to_date('2020-09-08 12/00/00+00:00', '%q') statement ok diff --git a/datafusion/sqllogictest/test_files/timestamps.slt b/datafusion/sqllogictest/test_files/timestamps.slt index 4b11e338da70..89ec8d96c210 100644 --- a/datafusion/sqllogictest/test_files/timestamps.slt +++ b/datafusion/sqllogictest/test_files/timestamps.slt @@ -1998,7 +1998,7 @@ SET TIME ZONE = '+05:00' statement ok CREATE TABLE foo (time TIMESTAMPTZ) AS VALUES - ('2020-01-01T00:00:00+05:00'), + ('2020-01-01T00:00:00+05:00'), ('2020-01-01T01:00:00+05:00'), ('2020-01-01T02:00:00+05:00'), ('2020-01-01T03:00:00+05:00') @@ -2045,7 +2045,7 @@ Timestamp(Microsecond, None) query T select arrow_typeof(date_trunc('microsecond', to_timestamp(61))) ---- -Timestamp(Nanosecond, None) +Timestamp(Nanosecond, None) # check date_bin query P @@ -2083,17 +2083,17 @@ NULL 1970-01-01T00:00:00 2031-01-19T23:33:25 1970-01-01T00:00:01 1969-12-31T23:5 # verify timestamp syntax styles are consistent query BBBBBBBBBBBBB SELECT to_timestamp(null) is null as c1, - null::timestamp is null as c2, - cast(null as timestamp) is null as c3, - to_timestamp(0) = 0::timestamp as c4, - to_timestamp(1926632005) = 1926632005::timestamp as c5, - to_timestamp(1) = 1::timestamp as c6, - to_timestamp(-1) = -1::timestamp as c7, + null::timestamp is null as c2, + cast(null as timestamp) is null as c3, + to_timestamp(0) = 0::timestamp as c4, + to_timestamp(1926632005) = 1926632005::timestamp as c5, + to_timestamp(1) = 1::timestamp as c6, + to_timestamp(-1) = -1::timestamp as c7, to_timestamp(0-1) = (0-1)::timestamp as c8, - to_timestamp(0) = cast(0 as timestamp) as c9, - to_timestamp(1926632005) = cast(1926632005 as timestamp) as c10, - to_timestamp(1) = cast(1 as timestamp) as c11, - to_timestamp(-1) = cast(-1 as timestamp) as c12, + to_timestamp(0) = cast(0 as timestamp) as c9, + to_timestamp(1926632005) = cast(1926632005 as timestamp) as c10, + to_timestamp(1) = cast(1 as timestamp) as c11, + to_timestamp(-1) = cast(-1 as timestamp) as c12, to_timestamp(0-1) = cast(0-1 as timestamp) as c13 ---- true true true true true true true true true true true true true @@ -2106,10 +2106,10 @@ Timestamp(Nanosecond, None) Timestamp(Nanosecond, None) Timestamp(Nanosecond, No # verify timestamp output types using timestamp literal syntax query BBBBBB -SELECT arrow_typeof(to_timestamp(1)) = arrow_typeof(1::timestamp) as c1, +SELECT arrow_typeof(to_timestamp(1)) = arrow_typeof(1::timestamp) as c1, arrow_typeof(to_timestamp(null)) = arrow_typeof(null::timestamp) as c2, arrow_typeof(to_timestamp('2023-01-10 12:34:56.000')) = arrow_typeof('2023-01-10 12:34:56.000'::timestamp) as c3, - arrow_typeof(to_timestamp(1)) = arrow_typeof(cast(1 as timestamp)) as c4, + arrow_typeof(to_timestamp(1)) = arrow_typeof(cast(1 as timestamp)) as c4, arrow_typeof(to_timestamp(null)) = arrow_typeof(cast(null as timestamp)) as c5, arrow_typeof(to_timestamp('2023-01-10 12:34:56.000')) = arrow_typeof(cast('2023-01-10 12:34:56.000' as timestamp)) as c6 ---- @@ -2161,23 +2161,23 @@ query error input contains invalid characters SELECT to_timestamp_seconds('2020-09-08 12/00/00+00:00', '%c', '%+') # to_timestamp with broken formatting -query error bad or unsupported format string +query error Error parsing timestamp SELECT to_timestamp('2020-09-08 12/00/00+00:00', '%q') # to_timestamp_nanos with broken formatting -query error bad or unsupported format string +query error Error parsing timestamp SELECT to_timestamp_nanos('2020-09-08 12/00/00+00:00', '%q') # to_timestamp_millis with broken formatting -query error bad or unsupported format string +query error Error parsing timestamp SELECT to_timestamp_millis('2020-09-08 12/00/00+00:00', '%q') # to_timestamp_micros with broken formatting -query error bad or unsupported format string +query error Error parsing timestamp SELECT to_timestamp_micros('2020-09-08 12/00/00+00:00', '%q') # to_timestamp_seconds with broken formatting -query error bad or unsupported format string +query error Error parsing timestamp SELECT to_timestamp_seconds('2020-09-08 12/00/00+00:00', '%q') # Create string timestamp table with different formats @@ -2266,13 +2266,13 @@ drop table table_a ########## statement ok -create table table_a (ts timestamp) as values - ('2020-09-08T11:42:29Z'::timestamp), +create table table_a (ts timestamp) as values + ('2020-09-08T11:42:29Z'::timestamp), ('2020-09-08T12:42:29Z'::timestamp), ('2020-09-08T13:42:29Z'::timestamp) statement ok -create table table_b (ts timestamp) as values +create table table_b (ts timestamp) as values ('2020-09-08T11:42:29.190Z'::timestamp), ('2020-09-08T13:42:29.190Z'::timestamp), ('2020-09-08T12:42:29.190Z'::timestamp) @@ -2394,8 +2394,8 @@ statement ok drop table t1 statement ok -create table table_a (val int, ts1 timestamp, ts2 timestamp) as values - (1, '2018-07-01T06:00:00'::timestamp, '2018-07-01T07:00:00'::timestamp), +create table table_a (val int, ts1 timestamp, ts2 timestamp) as values + (1, '2018-07-01T06:00:00'::timestamp, '2018-07-01T07:00:00'::timestamp), (2, '2018-07-01T07:00:00'::timestamp, '2018-07-01T08:00:00'::timestamp) query I?