diff --git a/Cargo.lock b/Cargo.lock index ed1ee3b8..eb674d0f 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -130,9 +130,9 @@ checksum = "96d30a06541fbafbc7f82ed10c06164cfbd2c401138f6addd8404629c4b16711" [[package]] name = "arrow" -version = "51.0.0" +version = "52.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "219d05930b81663fd3b32e3bde8ce5bff3c4d23052a99f11a8fa50a3b47b2658" +checksum = "7ae9728f104939be6d8d9b368a354b4929b0569160ea1641f0721b55a861ce38" dependencies = [ "arrow-arith", "arrow-array", @@ -152,9 +152,9 @@ dependencies = [ [[package]] name = "arrow-arith" -version = "51.0.0" +version = "52.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0272150200c07a86a390be651abdd320a2d12e84535f0837566ca87ecd8f95e0" +checksum = "a7029a5b3efbeafbf4a12d12dc16b8f9e9bff20a410b8c25c5d28acc089e1043" dependencies = [ "arrow-array", "arrow-buffer", @@ -167,9 +167,9 @@ dependencies = [ [[package]] name = "arrow-array" -version = "51.0.0" +version = "52.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8010572cf8c745e242d1b632bd97bd6d4f40fefed5ed1290a8f433abaa686fea" +checksum = "d33238427c60271710695f17742f45b1a5dc5bcfc5c15331c25ddfe7abf70d97" dependencies = [ "ahash", "arrow-buffer", @@ -184,9 +184,9 @@ dependencies = [ [[package]] name = "arrow-buffer" -version = "51.0.0" +version = "52.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0d0a2432f0cba5692bf4cb757469c66791394bac9ec7ce63c1afe74744c37b27" +checksum = "fe9b95e825ae838efaf77e366c00d3fc8cca78134c9db497d6bda425f2e7b7c1" dependencies = [ "bytes", "half", @@ -195,9 +195,9 @@ dependencies = [ [[package]] name = "arrow-cast" -version = "51.0.0" +version = "52.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9abc10cd7995e83505cc290df9384d6e5412b207b79ce6bdff89a10505ed2cba" +checksum = "87cf8385a9d5b5fcde771661dd07652b79b9139fea66193eda6a88664400ccab" dependencies = [ "arrow-array", "arrow-buffer", @@ -216,9 +216,9 @@ dependencies = [ [[package]] name = "arrow-csv" -version = "51.0.0" +version = "52.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "95cbcba196b862270bf2a5edb75927380a7f3a163622c61d40cbba416a6305f2" +checksum = "cea5068bef430a86690059665e40034625ec323ffa4dd21972048eebb0127adc" dependencies = [ "arrow-array", "arrow-buffer", @@ -235,9 +235,9 @@ dependencies = [ [[package]] name = "arrow-data" -version = "51.0.0" +version = "52.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2742ac1f6650696ab08c88f6dd3f0eb68ce10f8c253958a18c943a68cd04aec5" +checksum = "cb29be98f987bcf217b070512bb7afba2f65180858bca462edf4a39d84a23e10" dependencies = [ "arrow-buffer", "arrow-schema", @@ -247,9 +247,9 @@ dependencies = [ [[package]] name = "arrow-ipc" -version = "51.0.0" +version = "52.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a42ea853130f7e78b9b9d178cb4cd01dee0f78e64d96c2949dc0a915d6d9e19d" +checksum = "ffc68f6523970aa6f7ce1dc9a33a7d9284cfb9af77d4ad3e617dbe5d79cc6ec8" dependencies = [ "arrow-array", "arrow-buffer", @@ -262,9 +262,9 @@ dependencies = [ [[package]] name = "arrow-json" -version = "51.0.0" +version = "52.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "eaafb5714d4e59feae964714d724f880511500e3569cc2a94d02456b403a2a49" +checksum = "2041380f94bd6437ab648e6c2085a045e45a0c44f91a1b9a4fe3fed3d379bfb1" dependencies = [ "arrow-array", "arrow-buffer", @@ -282,9 +282,9 @@ dependencies = [ [[package]] name = "arrow-ord" -version = "51.0.0" +version = "52.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e3e6b61e3dc468f503181dccc2fc705bdcc5f2f146755fa5b56d0a6c5943f412" +checksum = "fcb56ed1547004e12203652f12fe12e824161ff9d1e5cf2a7dc4ff02ba94f413" dependencies = [ "arrow-array", "arrow-buffer", @@ -297,9 +297,9 @@ dependencies = [ [[package]] name = "arrow-row" -version = "51.0.0" +version = "52.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "848ee52bb92eb459b811fb471175ea3afcf620157674c8794f539838920f9228" +checksum = "575b42f1fc588f2da6977b94a5ca565459f5ab07b60545e17243fb9a7ed6d43e" dependencies = [ "ahash", "arrow-array", @@ -312,18 +312,18 @@ dependencies = [ [[package]] name = "arrow-schema" -version = "51.0.0" +version = "52.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "02d9483aaabe910c4781153ae1b6ae0393f72d9ef757d38d09d450070cf2e528" +checksum = "32aae6a60458a2389c0da89c9de0b7932427776127da1a738e2efc21d32f3393" dependencies = [ "bitflags 2.5.0", ] [[package]] name = "arrow-select" -version = "51.0.0" +version = "52.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "849524fa70e0e3c5ab58394c770cb8f514d0122d20de08475f7b472ed8075830" +checksum = "de36abaef8767b4220d7b4a8c2fe5ffc78b47db81b03d77e2136091c3ba39102" dependencies = [ "ahash", "arrow-array", @@ -335,9 +335,9 @@ dependencies = [ [[package]] name = "arrow-string" -version = "51.0.0" +version = "52.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9373cb5a021aee58863498c37eb484998ef13377f69989c6c5ccfbd258236cdb" +checksum = "e435ada8409bcafc910bc3e0077f532a4daa20e99060a496685c0e3e53cc2597" dependencies = [ "arrow-array", "arrow-buffer", @@ -364,8 +364,8 @@ dependencies = [ "pin-project-lite", "tokio", "xz2", - "zstd 0.13.1", - "zstd-safe 7.1.0", + "zstd 0.13.0", + "zstd-safe 7.0.0", ] [[package]] @@ -400,19 +400,16 @@ dependencies = [ ] [[package]] -name = "autocfg" -version = "1.3.0" +name = "atomic-waker" +version = "1.1.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0c4b4d0bd25bd0b74681c0ad21497610ce1b7c91b1022cd21c80c6fbdd9476b0" +checksum = "1505bd5d3d116872e7271a6d4e16d81d0c8570876c8de68093a09ac269d8aac0" [[package]] -name = "autotools" -version = "0.2.7" +name = "autocfg" +version = "1.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ef941527c41b0fc0dd48511a8154cd5fc7e29200a0ff8b7203c5d777dbc795cf" -dependencies = [ - "cc", -] +checksum = "0c4b4d0bd25bd0b74681c0ad21497610ce1b7c91b1022cd21c80c6fbdd9476b0" [[package]] name = "backtrace" @@ -486,9 +483,9 @@ dependencies = [ [[package]] name = "brotli" -version = "3.5.0" +version = "6.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d640d25bc63c50fb1f0b545ffd80207d2e10a4c965530809b40ba3386825c391" +checksum = "74f7971dbd9326d58187408ab83117d8ac1bb9c17b085fdacd1cf2f598719b6b" dependencies = [ "alloc-no-stdlib", "alloc-stdlib", @@ -497,9 +494,9 @@ dependencies = [ [[package]] name = "brotli-decompressor" -version = "2.5.1" +version = "4.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4e2e4afe60d7dd600fdd3de8d0f08c2b7ec039712e3b6137ff98b7004e82de4f" +checksum = "9a45bd2e4095a8b518033b128020dd4a55aab1c0a381ba4404a472630f4bc362" dependencies = [ "alloc-no-stdlib", "alloc-stdlib", @@ -576,9 +573,9 @@ dependencies = [ [[package]] name = "chrono-tz" -version = "0.8.6" +version = "0.9.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d59ae0466b83e838b81a54256c39d5d7c20b9d7daa10510a242d9b75abd5936e" +checksum = "93698b29de5e97ad0ae26447b344c482a7284c737d9ddc5f9e52b74a336671bb" dependencies = [ "chrono", "chrono-tz-build", @@ -587,15 +584,24 @@ dependencies = [ [[package]] name = "chrono-tz-build" -version = "0.2.1" +version = "0.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "433e39f13c9a060046954e0592a8d0a4bcb1040125cbf91cb8ee58964cfb350f" +checksum = "0c088aee841df9c3041febbb73934cfc39708749bf96dc827e3359cd39ef11b1" dependencies = [ "parse-zoneinfo", "phf", "phf_codegen", ] +[[package]] +name = "cmake" +version = "0.1.50" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a31c789563b815f77f4250caee12365734369f942439b7defd71e18a48197130" +dependencies = [ + "cc", +] + [[package]] name = "comfy-table" version = "7.1.1" @@ -734,9 +740,9 @@ dependencies = [ [[package]] name = "datafusion" -version = "38.0.0" +version = "39.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "05fb4eeeb7109393a0739ac5b8fd892f95ccef691421491c85544f7997366f68" +checksum = "2f92d2d7a9cba4580900b32b009848d9eb35f1028ac84cdd6ddcf97612cd0068" dependencies = [ "ahash", "apache-avro", @@ -759,6 +765,7 @@ dependencies = [ "datafusion-functions-array", "datafusion-optimizer", "datafusion-physical-expr", + "datafusion-physical-expr-common", "datafusion-physical-plan", "datafusion-sql", "flate2", @@ -767,13 +774,14 @@ dependencies = [ "half", "hashbrown", "indexmap", - "itertools", + "itertools 0.12.1", "log", "num-traits", "num_cpus", "object_store", "parking_lot", "parquet", + "paste", "pin-project-lite", "rand", "sqlparser", @@ -783,14 +791,14 @@ dependencies = [ "url", "uuid", "xz2", - "zstd 0.13.1", + "zstd 0.13.0", ] [[package]] name = "datafusion-common" -version = "38.0.0" +version = "39.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "741aeac15c82f239f2fc17deccaab19873abbd62987be20023689b15fa72fa09" +checksum = "effed030d2c1667eb1e11df5372d4981eaf5d11a521be32220b3985ae5ba6971" dependencies = [ "ahash", "apache-avro", @@ -800,6 +808,7 @@ dependencies = [ "arrow-schema", "chrono", "half", + "hashbrown", "instant", "libc", "num_cpus", @@ -811,18 +820,18 @@ dependencies = [ [[package]] name = "datafusion-common-runtime" -version = "38.0.0" +version = "39.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6e8ddfb8d8cb51646a30da0122ecfffb81ca16919ae9a3495a9e7468bdcd52b8" +checksum = "d0091318129dad1359f08e4c6c71f855163c35bba05d1dbf983196f727857894" dependencies = [ "tokio", ] [[package]] name = "datafusion-execution" -version = "38.0.0" +version = "39.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "282122f90b20e8f98ebfa101e4bf20e718fd2684cf81bef4e8c6366571c64404" +checksum = "8385aba84fc4a06d3ebccfbcbf9b4f985e80c762fac634b49079f7cc14933fb1" dependencies = [ "arrow", "chrono", @@ -841,13 +850,14 @@ dependencies = [ [[package]] name = "datafusion-expr" -version = "38.0.0" +version = "39.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5478588f733df0dfd87a62671c7478f590952c95fa2fa5c137e3ff2929491e22" +checksum = "ebb192f0055d2ce64e38ac100abc18e4e6ae9734d3c28eee522bbbd6a32108a3" dependencies = [ "ahash", "arrow", "arrow-array", + "arrow-buffer", "chrono", "datafusion-common", "paste", @@ -859,9 +869,9 @@ dependencies = [ [[package]] name = "datafusion-functions" -version = "38.0.0" +version = "39.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f4afd261cea6ac9c3ca1192fd5e9f940596d8e9208c5b1333f4961405db53185" +checksum = "27c081ae5b7edd712b92767fb8ed5c0e32755682f8075707666cd70835807c0b" dependencies = [ "arrow", "base64 0.22.1", @@ -874,7 +884,7 @@ dependencies = [ "datafusion-physical-expr", "hashbrown", "hex", - "itertools", + "itertools 0.12.1", "log", "md-5", "rand", @@ -886,11 +896,13 @@ dependencies = [ [[package]] name = "datafusion-functions-aggregate" -version = "38.0.0" +version = "39.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1b36a6c4838ab94b5bf8f7a96ce6ce059d805c5d1dcaa6ace49e034eb65cd999" +checksum = "feb28a4ea52c28a26990646986a27c4052829a2a2572386258679e19263f8b78" dependencies = [ + "ahash", "arrow", + "arrow-schema", "datafusion-common", "datafusion-execution", "datafusion-expr", @@ -902,9 +914,9 @@ dependencies = [ [[package]] name = "datafusion-functions-array" -version = "38.0.0" +version = "39.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d5fdd200a6233f48d3362e7ccb784f926f759100e44ae2137a5e2dcb986a59c4" +checksum = "89b17c02a74cdc87380a56758ec27e7d417356bf806f33062700908929aedb8a" dependencies = [ "arrow", "arrow-array", @@ -915,16 +927,16 @@ dependencies = [ "datafusion-execution", "datafusion-expr", "datafusion-functions", - "itertools", + "itertools 0.12.1", "log", "paste", ] [[package]] name = "datafusion-optimizer" -version = "38.0.0" +version = "39.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "54f2820938810e8a2d71228fd6f59f33396aebc5f5f687fcbf14de5aab6a7e1a" +checksum = "12172f2a6c9eb4992a51e62d709eeba5dedaa3b5369cce37ff6c2260e100ba76" dependencies = [ "arrow", "async-trait", @@ -934,16 +946,16 @@ dependencies = [ "datafusion-physical-expr", "hashbrown", "indexmap", - "itertools", + "itertools 0.12.1", "log", "regex-syntax", ] [[package]] name = "datafusion-physical-expr" -version = "38.0.0" +version = "39.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9adf8eb12716f52ddf01e09eb6c94d3c9b291e062c05c91b839a448bddba2ff8" +checksum = "7a3fce531b623e94180f6cd33d620ef01530405751b6ddd2fd96250cdbd78e2e" dependencies = [ "ahash", "arrow", @@ -963,7 +975,7 @@ dependencies = [ "hashbrown", "hex", "indexmap", - "itertools", + "itertools 0.12.1", "log", "paste", "petgraph", @@ -972,20 +984,21 @@ dependencies = [ [[package]] name = "datafusion-physical-expr-common" -version = "38.0.0" +version = "39.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8d5472c3230584c150197b3f2c23f2392b9dc54dbfb62ad41e7e36447cfce4be" +checksum = "046400b6a2cc3ed57a7c576f5ae6aecc77804ac8e0186926b278b189305b2a77" dependencies = [ "arrow", "datafusion-common", "datafusion-expr", + "rand", ] [[package]] name = "datafusion-physical-plan" -version = "38.0.0" +version = "39.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "18ae750c38389685a8b62e5b899bbbec488950755ad6d218f3662d35b800c4fe" +checksum = "4aed47f5a2ad8766260befb375b201592e86a08b260256e168ae4311426a2bff" dependencies = [ "ahash", "arrow", @@ -1006,7 +1019,7 @@ dependencies = [ "half", "hashbrown", "indexmap", - "itertools", + "itertools 0.12.1", "log", "once_cell", "parking_lot", @@ -1017,7 +1030,7 @@ dependencies = [ [[package]] name = "datafusion-python" -version = "38.0.1" +version = "39.0.0" dependencies = [ "async-trait", "datafusion", @@ -1045,9 +1058,9 @@ dependencies = [ [[package]] name = "datafusion-sql" -version = "38.0.0" +version = "39.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "befc67a3cdfbfa76853f43b10ac27337821bb98e519ab6baf431fcc0bcfcafdb" +checksum = "7fa92bb1fd15e46ce5fb6f1c85f3ac054592560f294429a28e392b5f9cd4255e" dependencies = [ "arrow", "arrow-array", @@ -1055,23 +1068,25 @@ dependencies = [ "datafusion-common", "datafusion-expr", "log", + "regex", "sqlparser", "strum 0.26.2", ] [[package]] name = "datafusion-substrait" -version = "38.0.0" +version = "39.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1f62542caa77df003e23a8bc2f1b8a1ffc682fe447c7fcb4905d109e3d7a5b9d" +checksum = "8249d69665c1cd32e07789ed6dd1da6528a23019ef16d3483db52952b6f9f68a" dependencies = [ + "arrow-buffer", "async-recursion", "chrono", "datafusion", - "itertools", + "itertools 0.12.1", "object_store", + "pbjson-types", "prost", - "prost-types", "substrait", ] @@ -1104,15 +1119,6 @@ version = "1.12.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3dca9240753cf90908d7e4aac30f630662b02aebaa1b58a3cadabdb23385b58b" -[[package]] -name = "encoding_rs" -version = "0.8.34" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b45de904aa0b010bce2ab45264d0631681847fa7b6f2eaa7dab7619943bc4f59" -dependencies = [ - "cfg-if", -] - [[package]] name = "equivalent" version = "1.0.1" @@ -1143,9 +1149,9 @@ checksum = "0ce7134b9999ecaf8bcd65542e436736ef32ddca1b3e06094cb6ec5755203b80" [[package]] name = "flatbuffers" -version = "23.5.26" +version = "24.3.25" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4dac53e22462d78c16d64a1cd22371b54cc3fe94aa15e7886a2fa6e5d1ab8640" +checksum = "8add37afff2d4ffa83bc748a70b4b1370984f6980768554182424ef71447c35f" dependencies = [ "bitflags 1.3.2", "rustc_version", @@ -1300,15 +1306,15 @@ checksum = "d2fabcfbdc87f4758337ca535fb41a6d701b65693ce38287d856d1674551ec9b" [[package]] name = "h2" -version = "0.3.26" +version = "0.4.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "81fe527a889e1532da5c525686d96d4c2e74cdd345badf8dfef9f6b39dd5f5e8" +checksum = "fa82e28a107a8cc405f0839610bdc9b15f1e25ec7d696aa5cf173edbcb1486ab" dependencies = [ + "atomic-waker", "bytes", "fnv", "futures-core", "futures-sink", - "futures-util", "http", "indexmap", "slab", @@ -1364,9 +1370,9 @@ checksum = "7f24254aa9a54b5c858eaee2f5bccdb46aaf0e486a595ed5fd8f86ba55232a70" [[package]] name = "http" -version = "0.2.12" +version = "1.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "601cbb57e577e2f5ef5be8e7b83f0f63994f25aa94d673e54a92d5c516d101f1" +checksum = "21b9ddb458710bc376481b842f5da65cdf31522de232c1ca8146abce2a358258" dependencies = [ "bytes", "fnv", @@ -1375,12 +1381,24 @@ dependencies = [ [[package]] name = "http-body" -version = "0.4.6" +version = "1.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1cac85db508abc24a2e48553ba12a996e87244a0395ce011e62b37158745d643" +dependencies = [ + "bytes", + "http", +] + +[[package]] +name = "http-body-util" +version = "0.1.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7ceab25649e9960c0311ea418d17bee82c0dcec1bd053b5f9a66e265a693bed2" +checksum = "0475f8b2ac86659c21b64320d5d653f9efe42acd2a4e560073ec61a155a34f1d" dependencies = [ "bytes", + "futures-core", "http", + "http-body", "pin-project-lite", ] @@ -1390,12 +1408,6 @@ version = "1.8.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d897f394bad6a705d5f4104762e116a75639e470d80901eed05a860a95cb1904" -[[package]] -name = "httpdate" -version = "1.0.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "df3b46402a9d5adb4c86a0cf463f42e19994e3ee891101b1841f30a545cb49a9" - [[package]] name = "humantime" version = "2.1.0" @@ -1404,40 +1416,59 @@ checksum = "9a3a5bfb195931eeb336b2a7b4d761daec841b97f947d34394601737a7bba5e4" [[package]] name = "hyper" -version = "0.14.29" +version = "1.3.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f361cde2f109281a220d4307746cdfd5ee3f410da58a70377762396775634b33" +checksum = "fe575dd17d0862a9a33781c8c4696a55c320909004a67a00fb286ba8b1bc496d" dependencies = [ "bytes", "futures-channel", - "futures-core", "futures-util", "h2", "http", "http-body", "httparse", - "httpdate", "itoa", "pin-project-lite", - "socket2", + "smallvec", "tokio", - "tower-service", - "tracing", "want", ] [[package]] name = "hyper-rustls" -version = "0.24.2" +version = "0.26.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ec3efd23720e2049821a693cbc7e65ea87c72f1c58ff2f9522ff332b1491e590" +checksum = "a0bea761b46ae2b24eb4aef630d8d1c398157b6fc29e6350ecf090a0b70c952c" dependencies = [ "futures-util", "http", "hyper", + "hyper-util", "rustls", + "rustls-pki-types", "tokio", "tokio-rustls", + "tower-service", +] + +[[package]] +name = "hyper-util" +version = "0.1.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7b875924a60b96e5d7b9ae7b066540b1dd1cbd90d1828f54c92e02a283351c56" +dependencies = [ + "bytes", + "futures-channel", + "futures-util", + "http", + "http-body", + "hyper", + "pin-project-lite", + "socket2", + "tokio", + "tower", + "tower-service", + "tracing", ] [[package]] @@ -1513,6 +1544,15 @@ version = "2.9.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8f518f335dce6725a761382244631d86cf0ccb2863413590b31338feb467f9c3" +[[package]] +name = "itertools" +version = "0.11.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b1c173a5686ce8bfa551b3563d0c2170bf24ca44da99c7ca4bfdab5418c3fe57" +dependencies = [ + "either", +] + [[package]] name = "itertools" version = "0.12.1" @@ -1865,18 +1905,18 @@ dependencies = [ [[package]] name = "object_store" -version = "0.9.1" +version = "0.10.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b8718f8b65fdf67a45108d1548347d4af7d71fb81ce727bbf9e3b2535e079db3" +checksum = "fbebfd32c213ba1907fa7a9c9138015a8de2b43e30c5aa45b18f7deb46786ad6" dependencies = [ "async-trait", - "base64 0.21.7", + "base64 0.22.1", "bytes", "chrono", "futures", "humantime", "hyper", - "itertools", + "itertools 0.12.1", "md-5", "parking_lot", "percent-encoding", @@ -1884,7 +1924,7 @@ dependencies = [ "rand", "reqwest", "ring", - "rustls-pemfile 2.1.2", + "rustls-pemfile", "serde", "serde_json", "snafu", @@ -1940,9 +1980,9 @@ dependencies = [ [[package]] name = "parquet" -version = "51.0.0" +version = "52.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "096795d4f47f65fd3ee1ec5a98b77ab26d602f2cc785b0e4be5443add17ecc32" +checksum = "29c3b5322cc1bbf67f11c079c42be41a55949099b78732f7dba9e15edde40eab" dependencies = [ "ahash", "arrow-array", @@ -1970,7 +2010,8 @@ dependencies = [ "thrift", "tokio", "twox-hash", - "zstd 0.13.1", + "zstd 0.13.0", + "zstd-sys", ] [[package]] @@ -1988,6 +2029,43 @@ version = "1.0.15" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "57c0d7b74b563b49d38dae00a0c37d4d6de9b432382b2892f0574ddcae73fd0a" +[[package]] +name = "pbjson" +version = "0.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1030c719b0ec2a2d25a5df729d6cff1acf3cc230bf766f4f97833591f7577b90" +dependencies = [ + "base64 0.21.7", + "serde", +] + +[[package]] +name = "pbjson-build" +version = "0.6.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2580e33f2292d34be285c5bc3dba5259542b083cfad6037b6d70345f24dcb735" +dependencies = [ + "heck 0.4.1", + "itertools 0.11.0", + "prost", + "prost-types", +] + +[[package]] +name = "pbjson-types" +version = "0.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "18f596653ba4ac51bdecbb4ef6773bc7f56042dc13927910de1684ad3d32aa12" +dependencies = [ + "bytes", + "chrono", + "pbjson", + "pbjson-build", + "prost", + "prost-build", + "serde", +] + [[package]] name = "percent-encoding" version = "2.3.1" @@ -2042,6 +2120,26 @@ dependencies = [ "siphasher", ] +[[package]] +name = "pin-project" +version = "1.1.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b6bf43b791c5b9e34c3d182969b4abb522f9343702850a2e57f460d00d09b4b3" +dependencies = [ + "pin-project-internal", +] + +[[package]] +name = "pin-project-internal" +version = "1.1.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2f38a4412a78282e09a2cf38d195ea5420d15ba0602cb375210efbc877243965" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.66", +] + [[package]] name = "pin-project-lite" version = "0.2.14" @@ -2109,14 +2207,16 @@ checksum = "22505a5c94da8e3b7c2996394d1c933236c4d743e81a410bcca4e6989fc066a4" dependencies = [ "bytes", "heck 0.5.0", - "itertools", + "itertools 0.12.1", "log", "multimap", "once_cell", "petgraph", + "prettyplease", "prost", "prost-types", "regex", + "syn 2.0.66", "tempfile", ] @@ -2127,7 +2227,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "81bddcdb20abf9501610992b6759a4c888aef7d1a7247ef75e2404275ac24af1" dependencies = [ "anyhow", - "itertools", + "itertools 0.12.1", "proc-macro2", "quote", "syn 2.0.66", @@ -2144,18 +2244,18 @@ dependencies = [ [[package]] name = "protobuf-src" -version = "1.1.0+21.5" +version = "2.0.1+26.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c7ac8852baeb3cc6fb83b93646fb93c0ffe5d14bf138c945ceb4b9948ee0e3c1" +checksum = "f8ba1cfa4b9dc098926b8cce388bf434b93516db3ecf6e8b1a37eb643d733ee7" dependencies = [ - "autotools", + "cmake", ] [[package]] name = "pyo3" -version = "0.20.3" +version = "0.21.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "53bdbb96d49157e65d45cc287af5f32ffadd5f4761438b527b055fb0d4bb8233" +checksum = "a5e00b96a521718e08e03b1a622f01c8a8deb50719335de3f60b3b3950f069d8" dependencies = [ "cfg-if", "indoc", @@ -2171,9 +2271,9 @@ dependencies = [ [[package]] name = "pyo3-build-config" -version = "0.20.3" +version = "0.21.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "deaa5745de3f5231ce10517a1f5dd97d53e5a2fd77aa6b5842292085831d48d7" +checksum = "7883df5835fafdad87c0d888b266c8ec0f4c9ca48a5bed6bbb592e8dedee1b50" dependencies = [ "once_cell", "target-lexicon", @@ -2181,9 +2281,9 @@ dependencies = [ [[package]] name = "pyo3-ffi" -version = "0.20.3" +version = "0.21.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "62b42531d03e08d4ef1f6e85a2ed422eb678b8cd62b762e53891c05faf0d4afa" +checksum = "01be5843dc60b916ab4dad1dca6d20b9b4e6ddc8e15f50c47fe6d85f1fb97403" dependencies = [ "libc", "pyo3-build-config", @@ -2191,9 +2291,9 @@ dependencies = [ [[package]] name = "pyo3-macros" -version = "0.20.3" +version = "0.21.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7305c720fa01b8055ec95e484a6eca7a83c841267f0dd5280f0c8b8551d2c158" +checksum = "77b34069fc0682e11b31dbd10321cbf94808394c56fd996796ce45217dfac53c" dependencies = [ "proc-macro2", "pyo3-macros-backend", @@ -2203,9 +2303,9 @@ dependencies = [ [[package]] name = "pyo3-macros-backend" -version = "0.20.3" +version = "0.21.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7c7e9b68bb9c3149c5b0cade5d07f953d6d125eb4337723c4ccdb665f1f96185" +checksum = "08260721f32db5e1a5beae69a55553f56b99bd0e1c3e6e0a5e8851a9d0f5a85c" dependencies = [ "heck 0.4.1", "proc-macro2", @@ -2315,9 +2415,9 @@ checksum = "adad44e29e4c806119491a7f06f03de4d1af22c3a680dd47f1e6e179439d1f56" [[package]] name = "regress" -version = "0.8.0" +version = "0.9.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4f5f39ba4513916c1b2657b72af6ec671f091cd637992f58d0ede5cae4e5dea0" +checksum = "0eae2a1ebfecc58aff952ef8ccd364329abe627762f5bf09ff42eb9d98522479" dependencies = [ "hashbrown", "memchr", @@ -2325,20 +2425,21 @@ dependencies = [ [[package]] name = "reqwest" -version = "0.11.27" +version = "0.12.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "dd67538700a17451e7cba03ac727fb961abb7607553461627b97de0b89cf4a62" +checksum = "566cafdd92868e0939d3fb961bd0dc25fcfaaed179291093b3d43e6b3150ea10" dependencies = [ - "base64 0.21.7", + "base64 0.22.1", "bytes", - "encoding_rs", "futures-core", "futures-util", "h2", "http", "http-body", + "http-body-util", "hyper", "hyper-rustls", + "hyper-util", "ipnet", "js-sys", "log", @@ -2348,12 +2449,12 @@ dependencies = [ "pin-project-lite", "rustls", "rustls-native-certs", - "rustls-pemfile 1.0.4", + "rustls-pemfile", + "rustls-pki-types", "serde", "serde_json", "serde_urlencoded", "sync_wrapper", - "system-configuration", "tokio", "tokio-rustls", "tokio-util", @@ -2417,37 +2518,31 @@ dependencies = [ [[package]] name = "rustls" -version = "0.21.12" +version = "0.22.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3f56a14d1f48b391359b22f731fd4bd7e43c97f3c50eee276f3aa09c94784d3e" +checksum = "bf4ef73721ac7bcd79b2b315da7779d8fc09718c6b3d2d1b2d94850eb8c18432" dependencies = [ "log", "ring", + "rustls-pki-types", "rustls-webpki", - "sct", + "subtle", + "zeroize", ] [[package]] name = "rustls-native-certs" -version = "0.6.3" +version = "0.7.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a9aace74cb666635c918e9c12bc0d348266037aa8eb599b5cba565709a8dff00" +checksum = "8f1fb85efa936c42c6d5fc28d2629bb51e4b2f4b8a5211e297d599cc5a093792" dependencies = [ "openssl-probe", - "rustls-pemfile 1.0.4", + "rustls-pemfile", + "rustls-pki-types", "schannel", "security-framework", ] -[[package]] -name = "rustls-pemfile" -version = "1.0.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1c74cae0a4cf6ccbbf5f359f08efdf8ee7e1dc532573bf0db71968cb56b1448c" -dependencies = [ - "base64 0.21.7", -] - [[package]] name = "rustls-pemfile" version = "2.1.2" @@ -2466,11 +2561,12 @@ checksum = "976295e77ce332211c0d24d92c0e83e50f5c5f046d11082cea19f3df13a3562d" [[package]] name = "rustls-webpki" -version = "0.101.7" +version = "0.102.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8b6275d1ee7a1cd780b64aca7726599a1dbc893b1e64144529e55c3c2f745765" +checksum = "ff448f7e92e913c4b7d4c6d8e4540a1724b319b4152b8aef6d4cf8339712b33e" dependencies = [ "ring", + "rustls-pki-types", "untrusted", ] @@ -2534,16 +2630,6 @@ version = "1.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "94143f37725109f92c262ed2cf5e59bce7498c01bcc1502d7b9afe439a4e9f49" -[[package]] -name = "sct" -version = "0.7.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "da046153aa2352493d6cb7da4b6e5c0c057d8a1d0a9aa8560baffdd945acd414" -dependencies = [ - "ring", - "untrusted", -] - [[package]] name = "security-framework" version = "2.11.0" @@ -2572,6 +2658,9 @@ name = "semver" version = "1.0.23" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "61697e0a1c7e512e84a621326239844a24d8207b4669b41bc18b32ea5cbf988b" +dependencies = [ + "serde", +] [[package]] name = "seq-macro" @@ -2736,9 +2825,9 @@ checksum = "6980e8d7511241f8acf4aebddbb1ff938df5eebe98691418c4468d0b72a96a67" [[package]] name = "sqlparser" -version = "0.45.0" +version = "0.47.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f7bbffee862a796d67959a89859d6b1046bb5016d63e23835ad0da182777bbe0" +checksum = "295e9930cd7a97e58ca2a070541a3ca502b17f5d1fa7157376d0fabd85324f25" dependencies = [ "log", "sqlparser_derive", @@ -2804,11 +2893,14 @@ dependencies = [ [[package]] name = "substrait" -version = "0.32.0" +version = "0.34.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f01344023c2614171a9ffd6e387eea14e12f7387c5b6adb33f1563187d65e376" +checksum = "04c77dec9b6c4e48ac828937bbe7cf473b0933168c5d76d51a5816ace7046be9" dependencies = [ "heck 0.5.0", + "pbjson", + "pbjson-build", + "pbjson-types", "prettyplease", "prost", "prost-build", @@ -2858,27 +2950,6 @@ version = "0.1.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "2047c6ded9c721764247e62cd3b03c09ffc529b2ba5b10ec482ae507a4a70160" -[[package]] -name = "system-configuration" -version = "0.5.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ba3a3adc5c275d719af8cb4272ea1c4a6d668a777f37e115f6d11ddbc1c8e0e7" -dependencies = [ - "bitflags 1.3.2", - "core-foundation", - "system-configuration-sys", -] - -[[package]] -name = "system-configuration-sys" -version = "0.5.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a75fb188eb626b924683e3b95e3a48e63551fcfb51949de2f06a9d91dbee93c9" -dependencies = [ - "core-foundation-sys", - "libc", -] - [[package]] name = "target-lexicon" version = "0.12.14" @@ -2982,11 +3053,12 @@ dependencies = [ [[package]] name = "tokio-rustls" -version = "0.24.1" +version = "0.25.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c28327cf380ac148141087fbfb9de9d7bd4e84ab5d2c28fbc911d753de8a7081" +checksum = "775e0c0f0adb3a2f22a00c4745d728b479985fc15ee7ca6a2608388c5569860f" dependencies = [ "rustls", + "rustls-pki-types", "tokio", ] @@ -3003,6 +3075,27 @@ dependencies = [ "tokio", ] +[[package]] +name = "tower" +version = "0.4.13" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b8fa9be0de6cf49e536ce1851f987bd21a43b771b09473c3549a6c853db37c1c" +dependencies = [ + "futures-core", + "futures-util", + "pin-project", + "pin-project-lite", + "tokio", + "tower-layer", + "tower-service", +] + +[[package]] +name = "tower-layer" +version = "0.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c20c8dbed6283a09604c3e69b4b7eeb54e298b8a600d4d5ecb5ad39de609f1d0" + [[package]] name = "tower-service" version = "0.3.2" @@ -3084,9 +3177,9 @@ checksum = "42ff0bf0c66b8238c6f3b578df37d0b7848e55df8577b3f74f92a69acceeb825" [[package]] name = "typify" -version = "0.0.16" +version = "0.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5c61e9db210bbff218e6535c664b37ec47da449169b98e7866d0580d0db75529" +checksum = "adb6beec125971dda80a086f90b4a70f60f222990ce4d63ad0fc140492f53444" dependencies = [ "typify-impl", "typify-macro", @@ -3094,16 +3187,18 @@ dependencies = [ [[package]] name = "typify-impl" -version = "0.0.16" +version = "0.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "95e32f38493804f88e2dc7a5412eccd872ea5452b4db9b0a77de4df180f2a87e" +checksum = "93bbb24e990654aff858d80fee8114f4322f7d7a1b1ecb45129e2fcb0d0ad5ae" dependencies = [ - "heck 0.4.1", + "heck 0.5.0", "log", "proc-macro2", "quote", "regress", "schemars", + "semver", + "serde", "serde_json", "syn 2.0.66", "thiserror", @@ -3112,13 +3207,14 @@ dependencies = [ [[package]] name = "typify-macro" -version = "0.0.16" +version = "0.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cc09508b72f63d521d68e42c7f172c7416d67986df44b3c7d1f7f9963948ed32" +checksum = "f8e6491896e955692d68361c68db2b263e3bec317ec0b684e0e2fa882fb6e31e" dependencies = [ "proc-macro2", "quote", "schemars", + "semver", "serde", "serde_json", "serde_tokenstream", @@ -3477,9 +3573,9 @@ checksum = "bec47e5bfd1bff0eeaf6d8b485cc1074891a197ab4225d504cb7a1ab88b02bf0" [[package]] name = "winreg" -version = "0.50.0" +version = "0.52.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "524e57b2c537c0f9b1e69f1965311ec12182b4122e45035b1508cd24d2adadb1" +checksum = "a277a57398d4bfa075df44f501a17cfdf8542d224f0d36095a2adc7aee4ef0a5" dependencies = [ "cfg-if", "windows-sys 0.48.0", @@ -3514,6 +3610,12 @@ dependencies = [ "syn 2.0.66", ] +[[package]] +name = "zeroize" +version = "1.8.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ced3678a2879b30306d323f4542626697a464a97c0a07c9aebf7ebca65cd4dde" + [[package]] name = "zstd" version = "0.12.4" @@ -3525,11 +3627,11 @@ dependencies = [ [[package]] name = "zstd" -version = "0.13.1" +version = "0.13.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2d789b1514203a1120ad2429eae43a7bd32b90976a7bb8a05f7ec02fa88cc23a" +checksum = "bffb3309596d527cfcba7dfc6ed6052f1d39dfbd7c867aa2e865e4a449c10110" dependencies = [ - "zstd-safe 7.1.0", + "zstd-safe 7.0.0", ] [[package]] @@ -3544,18 +3646,18 @@ dependencies = [ [[package]] name = "zstd-safe" -version = "7.1.0" +version = "7.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1cd99b45c6bc03a018c8b8a86025678c87e55526064e38f9df301989dce7ec0a" +checksum = "43747c7422e2924c11144d5229878b98180ef8b06cca4ab5af37afc8a8d8ea3e" dependencies = [ "zstd-sys", ] [[package]] name = "zstd-sys" -version = "2.0.10+zstd.1.5.6" +version = "2.0.9+zstd.1.5.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c253a4914af5bafc8fa8c86ee400827e83cf6ec01195ec1f1ed8441bf00d65aa" +checksum = "9e16efa8a874a0481a574084d34cc26fdb3b99627480f785888deb6386506656" dependencies = [ "cc", "pkg-config", diff --git a/Cargo.toml b/Cargo.toml index fd6f4bb3..85a19d1e 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -17,7 +17,7 @@ [package] name = "datafusion-python" -version = "38.0.1" +version = "39.0.0" homepage = "https://datafusion.apache.org/python" repository = "https://github.com/apache/datafusion-python" authors = ["Apache DataFusion "] @@ -36,28 +36,28 @@ substrait = ["dep:datafusion-substrait"] [dependencies] tokio = { version = "1.35", features = ["macros", "rt", "rt-multi-thread", "sync"] } rand = "0.8" -pyo3 = { version = "0.20", features = ["extension-module", "abi3", "abi3-py38"] } -datafusion = { version = "38.0.0", features = ["pyarrow", "avro", "unicode_expressions"] } -datafusion-common = { version = "38.0.0", features = ["pyarrow"] } -datafusion-expr = "38.0.0" -datafusion-functions-array = "38.0.0" -datafusion-optimizer = "38.0.0" -datafusion-sql = "38.0.0" -datafusion-substrait = { version = "38.0.0", optional = true } +pyo3 = { version = "0.21", features = ["extension-module", "abi3", "abi3-py38", "gil-refs"] } +datafusion = { version = "39.0.0", features = ["pyarrow", "avro", "unicode_expressions"] } +datafusion-common = { version = "39.0.0", features = ["pyarrow"] } +datafusion-expr = "39.0.0" +datafusion-functions-array = "39.0.0" +datafusion-optimizer = "39.0.0" +datafusion-sql = "39.0.0" +datafusion-substrait = { version = "39.0.0", optional = true } prost = "0.12" prost-types = "0.12" uuid = { version = "1.8", features = ["v4"] } mimalloc = { version = "0.1", optional = true, default-features = false, features = ["local_dynamic_tls"] } async-trait = "0.1" futures = "0.3" -object_store = { version = "0.9.1", features = ["aws", "gcp", "azure"] } +object_store = { version = "0.10.1", features = ["aws", "gcp", "azure"] } parking_lot = "0.12" regex-syntax = "0.8.1" syn = "2.0.43" url = "2.2" [build-dependencies] -pyo3-build-config = "0.20.0" +pyo3-build-config = "0.21" [lib] name = "datafusion_python" diff --git a/docs/source/user-guide/common-operations/functions.rst b/docs/source/user-guide/common-operations/functions.rst index 50b49309..d793314f 100644 --- a/docs/source/user-guide/common-operations/functions.rst +++ b/docs/source/user-guide/common-operations/functions.rst @@ -92,12 +92,13 @@ DataFusion offers a range of helpful options. f.left(col('"Name"'), literal(4)).alias("code") ) -This also includes the functions for regular expressions like :func:`.regexp_match` +This also includes the functions for regular expressions like :func:`.regexp_replace` and :func:`.regexp_match` .. ipython:: python df.select( f.regexp_match(col('"Name"'), literal("Char")).alias("dragons"), + f.regexp_replace(col('"Name"'), literal("saur"), literal("fleur")).alias("flowers") ) diff --git a/examples/tpch/_tests.py b/examples/tpch/_tests.py index 049b43df..aa9491bf 100644 --- a/examples/tpch/_tests.py +++ b/examples/tpch/_tests.py @@ -72,7 +72,10 @@ def check_q17(df): ("q08_market_share", "q8"), ("q09_product_type_profit_measure", "q9"), ("q10_returned_item_reporting", "q10"), - ("q11_important_stock_identification", "q11"), + pytest.param( + "q11_important_stock_identification", "q11", + marks=pytest.mark.xfail # https://github.com/apache/datafusion-python/issues/730 + ), ("q12_ship_mode_order_priority", "q12"), ("q13_customer_distribution", "q13"), ("q14_promotion_effect", "q14"), diff --git a/examples/tpch/q01_pricing_summary_report.py b/examples/tpch/q01_pricing_summary_report.py index 7e86055d..cb9485a7 100644 --- a/examples/tpch/q01_pricing_summary_report.py +++ b/examples/tpch/q01_pricing_summary_report.py @@ -48,9 +48,7 @@ # want to report results for. It should be between 60-120 days before the end. DAYS_BEFORE_FINAL = 90 -# Note: this is a hack on setting the values. It should be set differently once -# https://github.com/apache/datafusion-python/issues/665 is resolved. -interval = pa.scalar((0, 0, DAYS_BEFORE_FINAL), type=pa.month_day_nano_interval()) +interval = pa.scalar((0, DAYS_BEFORE_FINAL, 0), type=pa.month_day_nano_interval()) print("Final date in database:", greatest_ship_date) diff --git a/examples/tpch/q04_order_priority_checking.py b/examples/tpch/q04_order_priority_checking.py index 40eab697..9dbd8167 100644 --- a/examples/tpch/q04_order_priority_checking.py +++ b/examples/tpch/q04_order_priority_checking.py @@ -49,9 +49,7 @@ # Create a date object from the string date = datetime.strptime(DATE_OF_INTEREST, "%Y-%m-%d").date() -# Note: this is a hack on setting the values. It should be set differently once -# https://github.com/apache/datafusion-python/issues/665 is resolved. -interval = pa.scalar((0, 0, INTERVAL_DAYS), type=pa.month_day_nano_interval()) +interval = pa.scalar((0, INTERVAL_DAYS, 0), type=pa.month_day_nano_interval()) # Limit results to cases where commitment date before receipt date # Aggregate the results so we only get one row to join with the order table. diff --git a/examples/tpch/q05_local_supplier_volume.py b/examples/tpch/q05_local_supplier_volume.py index 27b4b84c..f17f600a 100644 --- a/examples/tpch/q05_local_supplier_volume.py +++ b/examples/tpch/q05_local_supplier_volume.py @@ -41,9 +41,7 @@ date = datetime.strptime(DATE_OF_INTEREST, "%Y-%m-%d").date() -# Note: this is a hack on setting the values. It should be set differently once -# https://github.com/apache/datafusion-python/issues/665 is resolved. -interval = pa.scalar((0, 0, INTERVAL_DAYS), type=pa.month_day_nano_interval()) +interval = pa.scalar((0, INTERVAL_DAYS, 0), type=pa.month_day_nano_interval()) # Load the dataframes we need diff --git a/examples/tpch/q06_forecasting_revenue_change.py b/examples/tpch/q06_forecasting_revenue_change.py index 3f58c5ec..ec98aaf5 100644 --- a/examples/tpch/q06_forecasting_revenue_change.py +++ b/examples/tpch/q06_forecasting_revenue_change.py @@ -45,9 +45,7 @@ date = datetime.strptime(DATE_OF_INTEREST, "%Y-%m-%d").date() -# Note: this is a hack on setting the values. It should be set differently once -# https://github.com/apache/datafusion-python/issues/665 is resolved. -interval = pa.scalar((0, 0, INTERVAL_DAYS), type=pa.month_day_nano_interval()) +interval = pa.scalar((0, INTERVAL_DAYS, 0), type=pa.month_day_nano_interval()) # Load the dataframes we need diff --git a/examples/tpch/q10_returned_item_reporting.py b/examples/tpch/q10_returned_item_reporting.py index ed88c299..78327c3a 100644 --- a/examples/tpch/q10_returned_item_reporting.py +++ b/examples/tpch/q10_returned_item_reporting.py @@ -38,9 +38,7 @@ date_start_of_quarter = lit(datetime.strptime(DATE_START_OF_QUARTER, "%Y-%m-%d").date()) -# Note: this is a hack on setting the values. It should be set differently once -# https://github.com/apache/datafusion-python/issues/665 is resolved. -interval_one_quarter = lit(pa.scalar((0, 0, 92), type=pa.month_day_nano_interval())) +interval_one_quarter = lit(pa.scalar((0, 92, 0), type=pa.month_day_nano_interval())) # Load the dataframes we need diff --git a/examples/tpch/q12_ship_mode_order_priority.py b/examples/tpch/q12_ship_mode_order_priority.py index d3dd7d28..150870c6 100644 --- a/examples/tpch/q12_ship_mode_order_priority.py +++ b/examples/tpch/q12_ship_mode_order_priority.py @@ -51,9 +51,7 @@ date = datetime.strptime(DATE_OF_INTEREST, "%Y-%m-%d").date() -# Note: this is a hack on setting the values. It should be set differently once -# https://github.com/apache/datafusion-python/issues/665 is resolved. -interval = pa.scalar((0, 0, 365), type=pa.month_day_nano_interval()) +interval = pa.scalar((0, 365, 0), type=pa.month_day_nano_interval()) df = df_lineitem.filter(col("l_receiptdate") >= lit(date)).filter( diff --git a/examples/tpch/q14_promotion_effect.py b/examples/tpch/q14_promotion_effect.py index 333398c1..75fa363a 100644 --- a/examples/tpch/q14_promotion_effect.py +++ b/examples/tpch/q14_promotion_effect.py @@ -34,9 +34,8 @@ DATE = "1995-09-01" date_of_interest = lit(datetime.strptime(DATE, "%Y-%m-%d").date()) -# Note: this is a hack on setting the values. It should be set differently once -# https://github.com/apache/datafusion-python/issues/665 is resolved. -interval_one_month = lit(pa.scalar((0, 0, 30), type=pa.month_day_nano_interval())) + +interval_one_month = lit(pa.scalar((0, 30, 0), type=pa.month_day_nano_interval())) # Load the dataframes we need diff --git a/examples/tpch/q15_top_supplier.py b/examples/tpch/q15_top_supplier.py index 91af34a9..4b9e4c1d 100644 --- a/examples/tpch/q15_top_supplier.py +++ b/examples/tpch/q15_top_supplier.py @@ -34,9 +34,8 @@ DATE = "1996-01-01" date_of_interest = lit(datetime.strptime(DATE, "%Y-%m-%d").date()) -# Note: this is a hack on setting the values. It should be set differently once -# https://github.com/apache/datafusion-python/issues/665 is resolved. -interval_3_months = lit(pa.scalar((0, 0, 91), type=pa.month_day_nano_interval())) + +interval_3_months = lit(pa.scalar((0, 91, 0), type=pa.month_day_nano_interval())) # Load the dataframes we need diff --git a/examples/tpch/q20_potential_part_promotion.py b/examples/tpch/q20_potential_part_promotion.py index 4a602846..85e7226f 100644 --- a/examples/tpch/q20_potential_part_promotion.py +++ b/examples/tpch/q20_potential_part_promotion.py @@ -56,9 +56,7 @@ date = datetime.strptime(DATE_OF_INTEREST, "%Y-%m-%d").date() -# Note: this is a hack on setting the values. It should be set differently once -# https://github.com/apache/datafusion-python/issues/665 is resolved. -interval = pa.scalar((0, 0, 365), type=pa.month_day_nano_interval()) +interval = pa.scalar((0, 365, 0), type=pa.month_day_nano_interval()) # Filter down dataframes df_nation = df_nation.filter(col("n_name") == lit(NATION_OF_INTEREST)) diff --git a/python/datafusion/__init__.py b/python/datafusion/__init__.py index d0b823bb..846b1a45 100644 --- a/python/datafusion/__init__.py +++ b/python/datafusion/__init__.py @@ -53,7 +53,6 @@ ScalarVariable, Sort, TableScan, - GetIndexedField, Not, IsNotNull, IsTrue, @@ -116,7 +115,6 @@ "SimilarTo", "ScalarVariable", "Alias", - "GetIndexedField", "Not", "IsNotNull", "IsTrue", diff --git a/python/datafusion/tests/test_functions.py b/python/datafusion/tests/test_functions.py index 493b6b6b..449f706c 100644 --- a/python/datafusion/tests/test_functions.py +++ b/python/datafusion/tests/test_functions.py @@ -461,7 +461,6 @@ def py_flatten(arr): pytest.param( lambda col: f.list_slice(col, literal(-1), literal(2)), lambda data: [arr[-1:2] for arr in data], - marks=pytest.mark.xfail, ), [ lambda col: f.array_intersect(col, literal([3.0, 4.0])), diff --git a/python/datafusion/tests/test_imports.py b/python/datafusion/tests/test_imports.py index 2a8a3de8..bd4e7c31 100644 --- a/python/datafusion/tests/test_imports.py +++ b/python/datafusion/tests/test_imports.py @@ -53,7 +53,6 @@ SimilarTo, ScalarVariable, Alias, - GetIndexedField, Not, IsNotNull, IsTrue, @@ -126,7 +125,6 @@ def test_class_module_is_datafusion(): SimilarTo, ScalarVariable, Alias, - GetIndexedField, Not, IsNotNull, IsTrue, diff --git a/src/common/data_type.rs b/src/common/data_type.rs index 49959c45..cd4f864b 100644 --- a/src/common/data_type.rs +++ b/src/common/data_type.rs @@ -251,6 +251,7 @@ impl DataTypeMap { pub fn map_from_scalar_to_arrow(scalar_val: &ScalarValue) -> Result { match scalar_val { ScalarValue::Boolean(_) => Ok(DataType::Boolean), + ScalarValue::Float16(_) => Ok(DataType::Float16), ScalarValue::Float32(_) => Ok(DataType::Float32), ScalarValue::Float64(_) => Ok(DataType::Float64), ScalarValue::Decimal128(_, precision, scale) => { diff --git a/src/dataset_exec.rs b/src/dataset_exec.rs index 4c9e2f3f..8ef3a563 100644 --- a/src/dataset_exec.rs +++ b/src/dataset_exec.rs @@ -164,7 +164,7 @@ impl ExecutionPlan for DatasetExec { self.schema.clone() } - fn children(&self) -> Vec> { + fn children(&self) -> Vec<&Arc> { // this is a leaf node and has no children vec![] } diff --git a/src/expr.rs b/src/expr.rs index 2f147745..9fd29a59 100644 --- a/src/expr.rs +++ b/src/expr.rs @@ -23,12 +23,12 @@ use std::sync::Arc; use datafusion::arrow::datatypes::{DataType, Field}; use datafusion::arrow::pyarrow::PyArrowType; +use datafusion::functions::core::expr_ext::FieldAccessor; use datafusion::scalar::ScalarValue; use datafusion_expr::{ col, expr::{AggregateFunction, InList, InSubquery, ScalarFunction, Sort, WindowFunction}, - lit, Between, BinaryExpr, Case, Cast, Expr, GetFieldAccess, GetIndexedField, Like, Operator, - TryCast, + lit, Between, BinaryExpr, Case, Cast, Expr, Like, Operator, TryCast, }; use crate::common::data_type::{DataTypeMap, RexType}; @@ -71,7 +71,6 @@ pub mod filter; pub mod grouping_set; pub mod in_list; pub mod in_subquery; -pub mod indexed_field; pub mod join; pub mod like; pub mod limit; @@ -216,13 +215,7 @@ impl PyExpr { } fn __getitem__(&self, key: &str) -> PyResult { - Ok(Expr::GetIndexedField(GetIndexedField::new( - Box::new(self.expr.clone()), - GetFieldAccess::NamedStructField { - name: ScalarValue::Utf8(Some(key.to_string())), - }, - )) - .into()) + Ok(self.expr.clone().field(key).into()) } #[staticmethod] @@ -263,7 +256,7 @@ impl PyExpr { pub fn rex_type(&self) -> PyResult { Ok(match self.expr { Expr::Alias(..) => RexType::Alias, - Expr::Column(..) | Expr::GetIndexedField { .. } => RexType::Reference, + Expr::Column(..) => RexType::Reference, Expr::ScalarVariable(..) | Expr::Literal(..) => RexType::Literal, Expr::BinaryExpr { .. } | Expr::Not(..) @@ -314,6 +307,11 @@ impl PyExpr { ), )), ScalarValue::Boolean(v) => Ok(v.into_py(py)), + ScalarValue::Float16(_) => Err(py_datafusion_err( + datafusion_common::DataFusionError::NotImplemented( + "ScalarValue::Float16".to_string(), + ), + )), ScalarValue::Float32(v) => Ok(v.into_py(py)), ScalarValue::Float64(v) => Ok(v.into_py(py)), ScalarValue::Decimal128(v, _, _) => Ok(v.into_py(py)), @@ -355,8 +353,10 @@ impl PyExpr { ScalarValue::TimestampMicrosecond(v, _) => Ok(v.into_py(py)), ScalarValue::TimestampNanosecond(v, _) => Ok(v.into_py(py)), ScalarValue::IntervalYearMonth(v) => Ok(v.into_py(py)), - ScalarValue::IntervalDayTime(v) => Ok(v.into_py(py)), - ScalarValue::IntervalMonthDayNano(v) => Ok(v.into_py(py)), + ScalarValue::IntervalDayTime(v) => Ok(ScalarValue::IntervalDayTime(*v).into_py(py)), + ScalarValue::IntervalMonthDayNano(v) => { + Ok(ScalarValue::IntervalMonthDayNano(*v).into_py(py)) + } ScalarValue::DurationSecond(v) => Ok(v.into_py(py)), ScalarValue::DurationMicrosecond(v) => Ok(v.into_py(py)), ScalarValue::DurationNanosecond(v) => Ok(v.into_py(py)), @@ -417,7 +417,6 @@ impl PyExpr { | Expr::IsNotFalse(expr) | Expr::IsNotUnknown(expr) | Expr::Negative(expr) - | Expr::GetIndexedField(GetIndexedField { expr, .. }) | Expr::Cast(Cast { expr, .. }) | Expr::TryCast(TryCast { expr, .. }) | Expr::Sort(Sort { expr, .. }) @@ -513,9 +512,7 @@ impl PyExpr { op, right: _, }) => format!("{op}"), - Expr::ScalarFunction(ScalarFunction { func_def, args: _ }) => { - func_def.name().to_string() - } + Expr::ScalarFunction(ScalarFunction { func, args: _ }) => func.name().to_string(), Expr::Cast { .. } => "cast".to_string(), Expr::Between { .. } => "between".to_string(), Expr::Case { .. } => "case".to_string(), @@ -674,7 +671,6 @@ pub(crate) fn init_module(m: &PyModule) -> PyResult<()> { m.add_class::()?; m.add_class::()?; m.add_class::()?; - m.add_class::()?; m.add_class::()?; m.add_class::()?; m.add_class::()?; diff --git a/src/expr/literal.rs b/src/expr/literal.rs index bce987f7..0333432f 100644 --- a/src/expr/literal.rs +++ b/src/expr/literal.rs @@ -137,12 +137,7 @@ impl PyLiteral { pub fn value_interval_day_time(&self) -> PyResult> { match &self.value { - ScalarValue::IntervalDayTime(Some(iv)) => { - let interval = *iv as u64; - let days = (interval >> 32) as i32; - let ms = interval as i32; - Ok(Some((days, ms))) - } + ScalarValue::IntervalDayTime(Some(iv)) => Ok(Some((iv.days, iv.milliseconds))), ScalarValue::IntervalDayTime(None) => Ok(None), other => Err(unexpected_literal_value(other)), } diff --git a/src/expr/signature.rs b/src/expr/signature.rs index 2f194982..7882cebe 100644 --- a/src/expr/signature.rs +++ b/src/expr/signature.rs @@ -20,7 +20,6 @@ use pyo3::prelude::*; #[allow(dead_code)] #[pyclass(name = "Signature", module = "datafusion.expr", subclass)] -#[allow(dead_code)] #[derive(Clone)] pub struct PySignature { type_signature: TypeSignature, diff --git a/src/functions.rs b/src/functions.rs index a4bd9860..09cdee61 100644 --- a/src/functions.rs +++ b/src/functions.rs @@ -36,32 +36,84 @@ use datafusion_expr::{ }; #[pyfunction] -#[pyo3(signature = (y, x, distinct = false, filter = None, order_by = None))] -pub fn covar_samp( - y: PyExpr, - x: PyExpr, +pub fn sum(args: PyExpr) -> PyExpr { + functions_aggregate::expr_fn::sum(args.expr).into() +} + +#[pyfunction] +pub fn covar_samp(y: PyExpr, x: PyExpr) -> PyExpr { + functions_aggregate::expr_fn::covar_samp(y.expr, x.expr).into() +} + +#[pyfunction] +pub fn covar_pop(y: PyExpr, x: PyExpr) -> PyExpr { + functions_aggregate::expr_fn::covar_pop(y.expr, x.expr).into() +} + +#[pyfunction] +pub fn median(arg: PyExpr) -> PyExpr { + functions_aggregate::expr_fn::median(arg.expr).into() +} + +#[pyfunction] +pub fn covar(y: PyExpr, x: PyExpr) -> PyExpr { + // alias for covar_samp + covar_samp(y, x) +} + +#[pyfunction] +pub fn var_samp(expression: PyExpr) -> PyExpr { + functions_aggregate::expr_fn::var_sample(expression.expr).into() +} + +#[pyfunction] +/// Alias for [`var_samp`] +pub fn var(y: PyExpr) -> PyExpr { + var_samp(y) +} + +#[pyfunction] +#[pyo3(signature = (*args, distinct = false, filter = None, order_by = None))] +pub fn first_value( + args: Vec, distinct: bool, filter: Option, order_by: Option>, - // null_treatment: Option, ) -> PyExpr { - let filter = filter.map(|x| Box::new(x.expr)); + // TODO: allow user to select null_treatment + let null_treatment = None; + let args = args.into_iter().map(|x| x.expr).collect::>(); let order_by = order_by.map(|x| x.into_iter().map(|x| x.expr).collect::>()); - functions_aggregate::expr_fn::covar_samp(y.expr, x.expr, distinct, filter, order_by, None) - .into() + functions_aggregate::expr_fn::first_value( + args, + distinct, + filter.map(|x| Box::new(x.expr)), + order_by, + null_treatment, + ) + .into() } #[pyfunction] -#[pyo3(signature = (y, x, distinct = false, filter = None, order_by = None))] -pub fn covar( - y: PyExpr, - x: PyExpr, +#[pyo3(signature = (*args, distinct = false, filter = None, order_by = None))] +pub fn last_value( + args: Vec, distinct: bool, filter: Option, order_by: Option>, ) -> PyExpr { - // alias for covar_samp - covar_samp(y, x, distinct, filter, order_by) + // TODO: allow user to select null_treatment + let null_treatment = None; + let args = args.into_iter().map(|x| x.expr).collect::>(); + let order_by = order_by.map(|x| x.into_iter().map(|x| x.expr).collect::>()); + functions_aggregate::expr_fn::last_value( + args, + distinct, + filter.map(|x| Box::new(x.expr)), + order_by, + null_treatment, + ) + .into() } #[pyfunction] @@ -131,17 +183,20 @@ fn list_indexof(array: PyExpr, element: PyExpr, index: Option) -> PyExpr { } #[pyfunction] -#[pyo3(signature = (array, begin, end, stride = 1))] -fn array_slice(array: PyExpr, begin: PyExpr, end: PyExpr, stride: Option) -> PyExpr { - let stride = ScalarValue::Int64(stride); - let stride = Expr::Literal(stride); - datafusion_functions_array::expr_fn::array_slice(array.into(), begin.into(), end.into(), stride) - .into() +#[pyo3(signature = (array, begin, end, stride = None))] +fn array_slice(array: PyExpr, begin: PyExpr, end: PyExpr, stride: Option) -> PyExpr { + datafusion_functions_array::expr_fn::array_slice( + array.into(), + begin.into(), + end.into(), + stride.map(Into::into), + ) + .into() } #[pyfunction] -#[pyo3(signature = (array, begin, end, stride = 1))] -fn list_slice(array: PyExpr, begin: PyExpr, end: PyExpr, stride: Option) -> PyExpr { +#[pyo3(signature = (array, begin, end, stride = None))] +fn list_slice(array: PyExpr, begin: PyExpr, end: PyExpr, stride: Option) -> PyExpr { // alias of array_slice array_slice(array, begin, end, stride) } @@ -176,6 +231,28 @@ fn concat_ws(sep: String, args: Vec) -> PyResult { Ok(functions::string::expr_fn::concat_ws(lit(sep), args).into()) } +#[pyfunction] +#[pyo3(signature = (values, regex, flags = None))] +fn regexp_match(values: PyExpr, regex: PyExpr, flags: Option) -> PyResult { + Ok(functions::expr_fn::regexp_match(values.expr, regex.expr, flags.map(|x| x.expr)).into()) +} + +#[pyfunction] +/// Replaces substring(s) matching a POSIX regular expression. +fn regexp_replace( + string: PyExpr, + pattern: PyExpr, + replacement: PyExpr, + flags: Option, +) -> PyResult { + Ok(functions::expr_fn::regexp_replace( + string.into(), + pattern.into(), + replacement.into(), + flags.map(|x| x.expr), + ) + .into()) +} /// Creates a new Sort Expr #[pyfunction] fn order_by(expr: PyExpr, asc: Option, nulls_first: Option) -> PyResult { @@ -435,17 +512,11 @@ expr_fn!( "Returns x if x is not NaN otherwise returns y." ); expr_fn!(nullif, arg_1 arg_2); -expr_fn_vec!(octet_length, "Returns number of bytes in the string. Since this version of the function accepts type character directly, it will not strip trailing spaces."); +expr_fn!(octet_length, args, "Returns number of bytes in the string. Since this version of the function accepts type character directly, it will not strip trailing spaces."); expr_fn!(pi); expr_fn!(power, base exponent); expr_fn!(pow, power, base exponent); expr_fn!(radians, num); -expr_fn!(regexp_match, input_arg1 input_arg2); -expr_fn!( - regexp_replace, - arg1 arg2 arg3 arg4, - "Replaces substring(s) matching a POSIX regular expression." -); expr_fn!(repeat, string n, "Repeats string the specified number of times."); expr_fn!( replace, @@ -576,19 +647,14 @@ aggregate_function!(array_agg, ArrayAgg); aggregate_function!(avg, Avg); aggregate_function!(corr, Correlation); aggregate_function!(count, Count); -aggregate_function!(covar_pop, CovariancePop); aggregate_function!(grouping, Grouping); aggregate_function!(max, Max); aggregate_function!(mean, Avg); -aggregate_function!(median, Median); aggregate_function!(min, Min); -aggregate_function!(sum, Sum); aggregate_function!(stddev, Stddev); aggregate_function!(stddev_pop, StddevPop); aggregate_function!(stddev_samp, Stddev); -aggregate_function!(var, Variance); aggregate_function!(var_pop, VariancePop); -aggregate_function!(var_samp, Variance); aggregate_function!(regr_avgx, RegrAvgx); aggregate_function!(regr_avgy, RegrAvgy); aggregate_function!(regr_count, RegrCount); @@ -598,8 +664,6 @@ aggregate_function!(regr_slope, RegrSlope); aggregate_function!(regr_sxx, RegrSXX); aggregate_function!(regr_sxy, RegrSXY); aggregate_function!(regr_syy, RegrSYY); -aggregate_function!(first_value, FirstValue); -aggregate_function!(last_value, LastValue); aggregate_function!(bit_and, BitAnd); aggregate_function!(bit_or, BitOr); aggregate_function!(bit_xor, BitXor); diff --git a/src/physical_plan.rs b/src/physical_plan.rs index 51032f2d..c97c1a96 100644 --- a/src/physical_plan.rs +++ b/src/physical_plan.rs @@ -40,7 +40,7 @@ impl PyExecutionPlan { self.plan .children() .iter() - .map(|p| p.to_owned().into()) + .map(|&p| p.to_owned().into()) .collect() }