diff --git a/Cargo.lock b/Cargo.lock
index 97bffdb060ea..e332fe6e1a87 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -17,6 +17,12 @@ version = "1.0.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "f26201604c87b1e01bd3d98f8d5d9a8fcbb815e8cedb41ffccbeb4bf593a35fe"
 
+[[package]]
+name = "adler2"
+version = "2.0.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "512761e0bb2578dd7380c6baaa0f4ce03e84f95e960231d1dec8bf4d7d6e2627"
+
 [[package]]
 name = "adler32"
 version = "1.2.0"
@@ -163,9 +169,9 @@ checksum = "9d151e35f61089500b617991b791fc8bfd237ae50cd5950803758a179b41e67a"
 
 [[package]]
 name = "arrayvec"
-version = "0.7.4"
+version = "0.7.6"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "96d30a06541fbafbc7f82ed10c06164cfbd2c401138f6addd8404629c4b16711"
+checksum = "7c02d123df017efcdfbd739ef81735b36c5ba83ec3c59c80a9d7ecc718f92e50"
 
 [[package]]
 name = "arrow-array"
@@ -251,7 +257,7 @@ checksum = "16e62a023e7c117e27523144c5d2459f4397fcc3cab0085af8e2224f643a0193"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.74",
+ "syn 2.0.76",
 ]
 
 [[package]]
@@ -262,7 +268,7 @@ checksum = "6e0c28dcc82d7c8ead5cb13beb15405b57b8546e93215673ff8ca0349a028107"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.74",
+ "syn 2.0.76",
 ]
 
 [[package]]
@@ -412,9 +418,9 @@ dependencies = [
 
 [[package]]
 name = "aws-sdk-sso"
-version = "1.37.0"
+version = "1.39.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "1074e818fbe4f9169242d78448b15be8916a79daa38ea1231f2e2e10d993fcd2"
+checksum = "11822090cf501c316c6f75711d77b96fba30658e3867a7762e5e2f5d32d31e81"
 dependencies = [
  "aws-credential-types",
  "aws-runtime",
@@ -434,9 +440,9 @@ dependencies = [
 
 [[package]]
 name = "aws-sdk-ssooidc"
-version = "1.38.0"
+version = "1.40.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "29755c51e33fa3f678598f64324a169cf4b7d3c4865d2709d4308f53366a92a4"
+checksum = "78a2a06ff89176123945d1bbe865603c4d7101bea216a550bb4d2e4e9ba74d74"
 dependencies = [
  "aws-credential-types",
  "aws-runtime",
@@ -456,9 +462,9 @@ dependencies = [
 
 [[package]]
 name = "aws-sdk-sts"
-version = "1.37.0"
+version = "1.39.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "6e52dc3fd7dfa6c01a69cf3903e00aa467261639138a05b06cd92314d2c8fb07"
+checksum = "a20a91795850826a6f456f4a48eff1dfa59a0e69bdbf5b8c50518fd372106574"
 dependencies = [
  "aws-credential-types",
  "aws-runtime",
@@ -591,9 +597,9 @@ dependencies = [
 
 [[package]]
 name = "aws-smithy-runtime"
-version = "1.6.2"
+version = "1.6.3"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "ce87155eba55e11768b8c1afa607f3e864ae82f03caf63258b37455b0ad02537"
+checksum = "0abbf454960d0db2ad12684a1640120e7557294b0ff8e2f11236290a1b293225"
 dependencies = [
  "aws-smithy-async",
  "aws-smithy-http",
@@ -635,9 +641,9 @@ dependencies = [
 
 [[package]]
 name = "aws-smithy-types"
-version = "1.2.0"
+version = "1.2.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "cfe321a6b21f5d8eabd0ade9c55d3d0335f3c3157fc2b3e87f05f34b539e4df5"
+checksum = "6cee7cadb433c781d3299b916fbf620fea813bf38f49db282fb6858141a05cc8"
 dependencies = [
  "base64-simd",
  "bytes",
@@ -692,7 +698,7 @@ dependencies = [
  "cc",
  "cfg-if",
  "libc",
- "miniz_oxide",
+ "miniz_oxide 0.7.4",
  "object",
  "rustc-demangle",
 ]
@@ -751,9 +757,9 @@ dependencies = [
 
 [[package]]
 name = "blake3"
-version = "1.5.3"
+version = "1.5.4"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "e9ec96fe9a81b5e365f9db71fe00edc4fe4ca2cc7dcb7861f0603012a7caa210"
+checksum = "d82033247fd8e890df8f740e407ad4d038debb9eb1f40533fffb32e7d17dc6f7"
 dependencies = [
  "arrayref",
  "arrayvec",
@@ -820,22 +826,22 @@ checksum = "79296716171880943b8470b5f8d03aa55eb2e645a4874bdbb28adb49162e012c"
 
 [[package]]
 name = "bytemuck"
-version = "1.16.3"
+version = "1.17.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "102087e286b4677862ea56cf8fc58bb2cdfa8725c40ffb80fe3a008eb7f2fc83"
+checksum = "6fd4c6dcc3b0aea2f5c0b4b82c2b15fe39ddbc76041a310848f4706edf76bb31"
 dependencies = [
  "bytemuck_derive",
 ]
 
 [[package]]
 name = "bytemuck_derive"
-version = "1.7.0"
+version = "1.7.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "1ee891b04274a59bd38b412188e24b849617b2e45a0fd8d057deb63e7403761b"
+checksum = "0cc8b54b395f2fcfbb3d90c47b01c7f444d94d05bdeb775811dec868ac3bbc26"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.74",
+ "syn 2.0.76",
 ]
 
 [[package]]
@@ -889,12 +895,13 @@ checksum = "37b2a672a2cb129a2e41c10b1224bb368f9f37a2b16b612598138befd7b37eb5"
 
 [[package]]
 name = "cc"
-version = "1.1.10"
+version = "1.1.14"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "e9e8aabfac534be767c909e0690571677d49f41bd8465ae876fe043d52ba5292"
+checksum = "50d2eb3cd3d1bf4529e31c215ee6f93ec5a3d536d9f578f93d9d33ee19562932"
 dependencies = [
  "jobserver",
  "libc",
+ "shlex",
 ]
 
 [[package]]
@@ -967,9 +974,9 @@ dependencies = [
 
 [[package]]
 name = "clap"
-version = "4.5.15"
+version = "4.5.16"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "11d8838454fda655dafd3accb2b6e2bea645b9e4078abe84a22ceb947235c5cc"
+checksum = "ed6719fffa43d0d87e5fd8caeab59be1554fb028cd30edc88fc4369b17971019"
 dependencies = [
  "clap_builder",
 ]
@@ -1001,9 +1008,9 @@ dependencies = [
 
 [[package]]
 name = "cmake"
-version = "0.1.50"
+version = "0.1.51"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "a31c789563b815f77f4250caee12365734369f942439b7defd71e18a48197130"
+checksum = "fb1e43aa7fd152b1f968787f7dbcdeb306d1867ff373c69955211876c053f91a"
 dependencies = [
  "cc",
 ]
@@ -1356,7 +1363,7 @@ dependencies = [
  "once_cell",
  "proc-macro2",
  "quote",
- "syn 2.0.74",
+ "syn 2.0.76",
 ]
 
 [[package]]
@@ -1411,9 +1418,9 @@ checksum = "95765f67b4b18863968b4a1bd5bb576f732b29a4a28c7cd84c09fa3e2875f33c"
 
 [[package]]
 name = "fastrand"
-version = "2.1.0"
+version = "2.1.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "9fc0510504f03c51ada170672ac806f1f105a88aa97a5281117e1ddc3368e51a"
+checksum = "e8c02a5121d4ea3eb16a80748c74f5549a5665e4c21333c6098f283870fbdea6"
 
 [[package]]
 name = "ff"
@@ -1427,13 +1434,13 @@ dependencies = [
 
 [[package]]
 name = "flate2"
-version = "1.0.31"
+version = "1.0.33"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "7f211bbe8e69bbd0cfdea405084f128ae8b4aaa6b0b522fc8f2b009084797920"
+checksum = "324a1be68054ef05ad64b861cc9eaf1d623d2d8cb25b4bf2cb9cdd902b4bf253"
 dependencies = [
  "crc32fast",
  "libz-ng-sys",
- "miniz_oxide",
+ "miniz_oxide 0.8.0",
 ]
 
 [[package]]
@@ -1532,7 +1539,7 @@ checksum = "87750cf4b7a4c0625b1529e4c543c2182106e4dedc60a2a6455e00d212c489ac"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.74",
+ "syn 2.0.76",
 ]
 
 [[package]]
@@ -1655,9 +1662,9 @@ dependencies = [
 
 [[package]]
 name = "h2"
-version = "0.4.5"
+version = "0.4.6"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "fa82e28a107a8cc405f0839610bdc9b15f1e25ec7d696aa5cf173edbcb1486ab"
+checksum = "524e8ac6999421f49a846c2d4411f337e53497d8ec55d67753beffa43c5d9205"
 dependencies = [
  "atomic-waker",
  "bytes",
@@ -1729,6 +1736,12 @@ version = "0.3.9"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "d231dfb89cfffdbc30e7fc41579ed6066ad03abda9e567ccafae602b97ec5024"
 
+[[package]]
+name = "hermit-abi"
+version = "0.4.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "fbf6a919d6cf397374f7dfeeea91d974c7c0a7221d0d0f4f20d859d329e53fcc"
+
 [[package]]
 name = "hex"
 version = "0.4.3"
@@ -1860,7 +1873,7 @@ dependencies = [
  "bytes",
  "futures-channel",
  "futures-util",
- "h2 0.4.5",
+ "h2 0.4.6",
  "http 1.1.0",
  "http-body 1.0.1",
  "httparse",
@@ -1898,7 +1911,7 @@ dependencies = [
  "hyper 1.4.1",
  "hyper-util",
  "rustls 0.23.12",
- "rustls-native-certs 0.7.1",
+ "rustls-native-certs 0.7.2",
  "rustls-pki-types",
  "tokio",
  "tokio-rustls 0.26.0",
@@ -1960,9 +1973,9 @@ dependencies = [
 
 [[package]]
 name = "indexmap"
-version = "2.3.0"
+version = "2.4.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "de3fc2e30ba82dd1b3911c8de1ffc143c74a914a14e99514d7637e3099df5ea0"
+checksum = "93ead53efc7ea8ed3cfb0c79fc8023fbb782a5432b52830b6518941cebe6505c"
 dependencies = [
  "equivalent",
  "hashbrown",
@@ -1989,11 +2002,11 @@ checksum = "8f518f335dce6725a761382244631d86cf0ccb2863413590b31338feb467f9c3"
 
 [[package]]
 name = "is-terminal"
-version = "0.4.12"
+version = "0.4.13"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "f23ff5ef2b80d608d61efee834934d862cd92461afc0560dedf493e4c033738b"
+checksum = "261f68e344040fbd0edea105bef17c66edf46f984ddb1115b775ce31be948f4b"
 dependencies = [
- "hermit-abi",
+ "hermit-abi 0.4.0",
  "libc",
  "windows-sys 0.52.0",
 ]
@@ -2149,9 +2162,9 @@ dependencies = [
 
 [[package]]
 name = "libc"
-version = "0.2.155"
+version = "0.2.158"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "97b3888a4aecf77e811145cadf6eef5901f4782c53886191b2f693f24761847c"
+checksum = "d8adc4bb1803a324070e64a98ae98f38934d91957a99cfb3a43dcbc01bc56439"
 
 [[package]]
 name = "libflate"
@@ -2237,9 +2250,9 @@ dependencies = [
 
 [[package]]
 name = "libz-ng-sys"
-version = "1.1.15"
+version = "1.1.16"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "c6409efc61b12687963e602df8ecf70e8ddacf95bc6576bcf16e3ac6328083c5"
+checksum = "4436751a01da56f1277f323c80d584ffad94a3d14aecd959dd0dff75aa73a438"
 dependencies = [
  "cmake",
  "libc",
@@ -2247,9 +2260,9 @@ dependencies = [
 
 [[package]]
 name = "libz-sys"
-version = "1.1.19"
+version = "1.1.20"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "fdc53a7799a7496ebc9fd29f31f7df80e83c9bda5299768af5f9e59eeea74647"
+checksum = "d2d16453e800a8cf6dd2fc3eb4bc99b786a9b90c663b8559a5b1a041bf89e472"
 dependencies = [
  "cc",
  "libc",
@@ -2385,13 +2398,22 @@ dependencies = [
  "adler",
 ]
 
+[[package]]
+name = "miniz_oxide"
+version = "0.8.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "e2d80299ef12ff69b16a84bb182e3b9df68b5a91574d3d4fa6e41b65deec4df1"
+dependencies = [
+ "adler2",
+]
+
 [[package]]
 name = "mio"
 version = "1.0.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "80e04d1dcff3aae0704555fe5fee3bcfaf3d1fdf8a7e521d5b9d2b42acb52cec"
 dependencies = [
- "hermit-abi",
+ "hermit-abi 0.3.9",
  "libc",
  "wasi",
  "windows-sys 0.52.0",
@@ -2821,7 +2843,7 @@ checksum = "2f38a4412a78282e09a2cf38d195ea5420d15ba0602cb375210efbc877243965"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.74",
+ "syn 2.0.76",
 ]
 
 [[package]]
@@ -3106,6 +3128,7 @@ dependencies = [
  "fs4",
  "futures",
  "glob",
+ "hashbrown",
  "home",
  "itoa",
  "memchr",
@@ -3405,6 +3428,8 @@ dependencies = [
  "atomic-waker",
  "crossbeam-deque",
  "crossbeam-utils",
+ "futures",
+ "memmap2",
  "parking_lot",
  "pin-project-lite",
  "polars-core",
@@ -3412,6 +3437,7 @@ dependencies = [
  "polars-expr",
  "polars-io",
  "polars-mem-engine",
+ "polars-parquet",
  "polars-plan",
  "polars-utils",
  "rand",
@@ -3451,6 +3477,7 @@ dependencies = [
  "bytes",
  "hashbrown",
  "indexmap",
+ "libc",
  "memmap2",
  "num-traits",
  "once_cell",
@@ -3458,6 +3485,7 @@ dependencies = [
  "rand",
  "raw-cpuid",
  "rayon",
+ "serde",
  "smartstring",
  "stacker",
  "sysinfo",
@@ -3603,7 +3631,7 @@ dependencies = [
  "proc-macro2",
  "pyo3-macros-backend",
  "quote",
- "syn 2.0.74",
+ "syn 2.0.76",
 ]
 
 [[package]]
@@ -3616,7 +3644,7 @@ dependencies = [
  "proc-macro2",
  "pyo3-build-config",
  "quote",
- "syn 2.0.74",
+ "syn 2.0.76",
 ]
 
 [[package]]
@@ -3696,9 +3724,9 @@ dependencies = [
 
 [[package]]
 name = "quote"
-version = "1.0.36"
+version = "1.0.37"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "0fa76aaf39101c457836aec0ce2316dbdc3ab723cdda1c6bd4e6ad4208acaca7"
+checksum = "b5b9d34b8991d19d98081b46eacdd8eb58c6f2b201139f7c5f643cc155a633af"
 dependencies = [
  "proc-macro2",
 ]
@@ -3814,7 +3842,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "76009fbe0614077fc1a2ce255e3a1881a2e3a3527097d5dc6d8212c585e7e38b"
 dependencies = [
  "quote",
- "syn 2.0.74",
+ "syn 2.0.76",
 ]
 
 [[package]]
@@ -3843,7 +3871,7 @@ checksum = "bcc303e793d3734489387d205e9b186fac9c6cfacedd98cbb2e8a5943595f3e6"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.74",
+ "syn 2.0.76",
 ]
 
 [[package]]
@@ -3889,16 +3917,16 @@ checksum = "7a66a03ae7c801facd77a29370b4faec201768915ac14a721ba36f20bc9c209b"
 
 [[package]]
 name = "reqwest"
-version = "0.12.5"
+version = "0.12.7"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "c7d6d2a27d57148378eb5e111173f4276ad26340ecc5c49a4a2152167a2d6a37"
+checksum = "f8f4955649ef5c38cc7f9e8aa41761d48fb9677197daea9984dc54f56aad5e63"
 dependencies = [
  "base64 0.22.1",
  "bytes",
  "futures-channel",
  "futures-core",
  "futures-util",
- "h2 0.4.5",
+ "h2 0.4.6",
  "http 1.1.0",
  "http-body 1.0.1",
  "http-body-util",
@@ -3914,7 +3942,7 @@ dependencies = [
  "pin-project-lite",
  "quinn",
  "rustls 0.23.12",
- "rustls-native-certs 0.7.1",
+ "rustls-native-certs 0.7.2",
  "rustls-pemfile 2.1.3",
  "rustls-pki-types",
  "serde",
@@ -3930,7 +3958,7 @@ dependencies = [
  "wasm-bindgen-futures",
  "wasm-streams",
  "web-sys",
- "winreg",
+ "windows-registry",
 ]
 
 [[package]]
@@ -4045,9 +4073,9 @@ dependencies = [
 
 [[package]]
 name = "rustls-native-certs"
-version = "0.7.1"
+version = "0.7.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "a88d6d420651b496bdd98684116959239430022a115c1240e6c3993be0b15fba"
+checksum = "04182dffc9091a404e0fc069ea5cd60e5b866c3adf881eff99a32d048242dffa"
 dependencies = [
  "openssl-probe",
  "rustls-pemfile 2.1.3",
@@ -4242,29 +4270,29 @@ dependencies = [
 
 [[package]]
 name = "serde"
-version = "1.0.207"
+version = "1.0.209"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "5665e14a49a4ea1b91029ba7d3bca9f299e1f7cfa194388ccc20f14743e784f2"
+checksum = "99fce0ffe7310761ca6bf9faf5115afbc19688edd00171d81b1bb1b116c63e09"
 dependencies = [
  "serde_derive",
 ]
 
 [[package]]
 name = "serde_derive"
-version = "1.0.207"
+version = "1.0.209"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "6aea2634c86b0e8ef2cfdc0c340baede54ec27b1e46febd7f80dffb2aa44a00e"
+checksum = "a5831b979fd7b5439637af1752d535ff49f4860c0f341d1baeb6faf0f4242170"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.74",
+ "syn 2.0.76",
 ]
 
 [[package]]
 name = "serde_json"
-version = "1.0.124"
+version = "1.0.127"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "66ad62847a56b3dba58cc891acd13884b9c61138d330c0d7b6181713d4fce38d"
+checksum = "8043c06d9f82bd7271361ed64f415fe5e12a77fdb52e573e7f06a516dea329ad"
 dependencies = [
  "indexmap",
  "itoa",
@@ -4316,6 +4344,12 @@ dependencies = [
  "digest",
 ]
 
+[[package]]
+name = "shlex"
+version = "1.3.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "0fda2ff0d084019ba4d7c6f371c95d8fd75ce3524c3cb8fb653a3023f6323e64"
+
 [[package]]
 name = "signal-hook-registry"
 version = "1.4.2"
@@ -4466,15 +4500,15 @@ dependencies = [
 
 [[package]]
 name = "stacker"
-version = "0.1.15"
+version = "0.1.16"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "c886bd4480155fd3ef527d45e9ac8dd7118a898a46530b7b94c3e21866259fce"
+checksum = "95a5daa25ea337c85ed954c0496e3bdd2c7308cc3b24cf7b50d04876654c579f"
 dependencies = [
  "cc",
  "cfg-if",
  "libc",
  "psm",
- "winapi",
+ "windows-sys 0.36.1",
 ]
 
 [[package]]
@@ -4526,7 +4560,7 @@ dependencies = [
  "proc-macro2",
  "quote",
  "rustversion",
- "syn 2.0.74",
+ "syn 2.0.76",
 ]
 
 [[package]]
@@ -4539,7 +4573,7 @@ dependencies = [
  "proc-macro2",
  "quote",
  "rustversion",
- "syn 2.0.74",
+ "syn 2.0.76",
 ]
 
 [[package]]
@@ -4561,9 +4595,9 @@ dependencies = [
 
 [[package]]
 name = "syn"
-version = "2.0.74"
+version = "2.0.76"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "1fceb41e3d546d0bd83421d3409b1460cc7444cd389341a4c880fe7a042cb3d7"
+checksum = "578e081a14e0cefc3279b0472138c513f37b41a08d5a3cca9b6e4e8ceb6cd525"
 dependencies = [
  "proc-macro2",
  "quote",
@@ -4575,12 +4609,15 @@ name = "sync_wrapper"
 version = "1.0.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "a7065abeca94b6a8a577f9bd45aa0867a2238b74e8eb67cf10d492bc39351394"
+dependencies = [
+ "futures-core",
+]
 
 [[package]]
 name = "sysinfo"
-version = "0.31.2"
+version = "0.31.3"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "d4115055da5f572fff541dd0c4e61b0262977f453cc9fe04be83aba25a89bdab"
+checksum = "2b92e0bdf838cbc1c4c9ba14f9c97a7ec6cdcd1ae66b10e1e42775a25553f45d"
 dependencies = [
  "core-foundation-sys",
  "libc",
@@ -4631,7 +4668,7 @@ checksum = "a4558b58466b9ad7ca0f102865eccc95938dca1a74a856f2b57b6629050da261"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.74",
+ "syn 2.0.76",
 ]
 
 [[package]]
@@ -4700,9 +4737,9 @@ checksum = "1f3ccbac311fea05f86f61904b462b55fb3df8837a366dfc601a0161d0532f20"
 
 [[package]]
 name = "tokio"
-version = "1.39.2"
+version = "1.39.3"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "daa4fb1bc778bd6f04cbfc4bb2d06a7396a8f299dc33ea1900cedaa316f467b1"
+checksum = "9babc99b9923bfa4804bd74722ff02c0381021eafa4db9949217e3be8e84fff5"
 dependencies = [
  "backtrace",
  "bytes",
@@ -4723,7 +4760,7 @@ checksum = "693d596312e88961bc67d7f1f97af8a70227d9f90c31bba5806eec004978d752"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.74",
+ "syn 2.0.76",
 ]
 
 [[package]]
@@ -4812,15 +4849,15 @@ dependencies = [
 
 [[package]]
 name = "tower-layer"
-version = "0.3.2"
+version = "0.3.3"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "c20c8dbed6283a09604c3e69b4b7eeb54e298b8a600d4d5ecb5ad39de609f1d0"
+checksum = "121c2a6cda46980bb0fcd1647ffaf6cd3fc79a013de288782836f6df9c48780e"
 
 [[package]]
 name = "tower-service"
-version = "0.3.2"
+version = "0.3.3"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "b6bc1c9ce2b5135ac7f93c72918fc37feb872bdc6a5533a8b85eb4b86bfdae52"
+checksum = "8df9b6e13f2d32c91b9bd719c00d1958837bc7dec474d94952798cc8e69eeec3"
 
 [[package]]
 name = "tracing"
@@ -4841,7 +4878,7 @@ checksum = "34704c8d6ebcbc939824180af020566b01a7c01f80641264eba0999f6c2b6be7"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.74",
+ "syn 2.0.76",
 ]
 
 [[package]]
@@ -4886,7 +4923,7 @@ checksum = "f03ca4cb38206e2bef0700092660bb74d696f808514dae47fa1467cbfe26e96e"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.74",
+ "syn 2.0.76",
 ]
 
 [[package]]
@@ -5059,7 +5096,7 @@ dependencies = [
  "once_cell",
  "proc-macro2",
  "quote",
- "syn 2.0.74",
+ "syn 2.0.76",
  "wasm-bindgen-shared",
 ]
 
@@ -5093,7 +5130,7 @@ checksum = "afc340c74d9005395cf9dd098506f7f44e38f2b4a21c6aaacf9a105ea5e1e836"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.74",
+ "syn 2.0.76",
  "wasm-bindgen-backend",
  "wasm-bindgen-shared",
 ]
@@ -5185,7 +5222,7 @@ checksum = "d2ed2439a290666cd67ecce2b0ffaad89c2a56b976b736e6ece670297897832d"
 dependencies = [
  "windows-implement",
  "windows-interface",
- "windows-result",
+ "windows-result 0.1.2",
  "windows-targets 0.52.6",
 ]
 
@@ -5197,7 +5234,7 @@ checksum = "9107ddc059d5b6fbfbffdfa7a7fe3e22a226def0b2608f72e9d552763d3e1ad7"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.74",
+ "syn 2.0.76",
 ]
 
 [[package]]
@@ -5208,7 +5245,18 @@ checksum = "29bee4b38ea3cde66011baa44dba677c432a78593e202392d1e9070cf2a7fca7"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.74",
+ "syn 2.0.76",
+]
+
+[[package]]
+name = "windows-registry"
+version = "0.2.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "e400001bb720a623c1c69032f8e3e4cf09984deec740f007dd2b03ec864804b0"
+dependencies = [
+ "windows-result 0.2.0",
+ "windows-strings",
+ "windows-targets 0.52.6",
 ]
 
 [[package]]
@@ -5220,13 +5268,36 @@ dependencies = [
  "windows-targets 0.52.6",
 ]
 
+[[package]]
+name = "windows-result"
+version = "0.2.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "1d1043d8214f791817bab27572aaa8af63732e11bf84aa21a45a78d6c317ae0e"
+dependencies = [
+ "windows-targets 0.52.6",
+]
+
+[[package]]
+name = "windows-strings"
+version = "0.1.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "4cd9b125c486025df0eabcb585e62173c6c9eddcec5d117d3b6e8c30e2ee4d10"
+dependencies = [
+ "windows-result 0.2.0",
+ "windows-targets 0.52.6",
+]
+
 [[package]]
 name = "windows-sys"
-version = "0.48.0"
+version = "0.36.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "677d2418bec65e3338edb076e806bc1ec15693c5d0104683f2efe857f61056a9"
+checksum = "ea04155a16a59f9eab786fe12a4a450e75cdb175f9e0d80da1e17db09f55b8d2"
 dependencies = [
- "windows-targets 0.48.5",
+ "windows_aarch64_msvc 0.36.1",
+ "windows_i686_gnu 0.36.1",
+ "windows_i686_msvc 0.36.1",
+ "windows_x86_64_gnu 0.36.1",
+ "windows_x86_64_msvc 0.36.1",
 ]
 
 [[package]]
@@ -5290,6 +5361,12 @@ version = "0.52.6"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "32a4622180e7a0ec044bb555404c800bc9fd9ec262ec147edd5989ccd0c02cd3"
 
+[[package]]
+name = "windows_aarch64_msvc"
+version = "0.36.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "9bb8c3fd39ade2d67e9874ac4f3db21f0d710bee00fe7cab16949ec184eeaa47"
+
 [[package]]
 name = "windows_aarch64_msvc"
 version = "0.48.5"
@@ -5302,6 +5379,12 @@ version = "0.52.6"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "09ec2a7bb152e2252b53fa7803150007879548bc709c039df7627cabbd05d469"
 
+[[package]]
+name = "windows_i686_gnu"
+version = "0.36.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "180e6ccf01daf4c426b846dfc66db1fc518f074baa793aa7d9b9aaeffad6a3b6"
+
 [[package]]
 name = "windows_i686_gnu"
 version = "0.48.5"
@@ -5320,6 +5403,12 @@ version = "0.52.6"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "0eee52d38c090b3caa76c563b86c3a4bd71ef1a819287c19d586d7334ae8ed66"
 
+[[package]]
+name = "windows_i686_msvc"
+version = "0.36.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "e2e7917148b2812d1eeafaeb22a97e4813dfa60a3f8f78ebe204bcc88f12f024"
+
 [[package]]
 name = "windows_i686_msvc"
 version = "0.48.5"
@@ -5332,6 +5421,12 @@ version = "0.52.6"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "240948bc05c5e7c6dabba28bf89d89ffce3e303022809e73deaefe4f6ec56c66"
 
+[[package]]
+name = "windows_x86_64_gnu"
+version = "0.36.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "4dcd171b8776c41b97521e5da127a2d86ad280114807d0b2ab1e462bc764d9e1"
+
 [[package]]
 name = "windows_x86_64_gnu"
 version = "0.48.5"
@@ -5356,6 +5451,12 @@ version = "0.52.6"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "24d5b23dc417412679681396f2b49f3de8c1473deb516bd34410872eff51ed0d"
 
+[[package]]
+name = "windows_x86_64_msvc"
+version = "0.36.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "c811ca4a8c853ef420abd8592ba53ddbbac90410fab6903b3e79972a631f7680"
+
 [[package]]
 name = "windows_x86_64_msvc"
 version = "0.48.5"
@@ -5377,16 +5478,6 @@ dependencies = [
  "memchr",
 ]
 
-[[package]]
-name = "winreg"
-version = "0.52.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "a277a57398d4bfa075df44f501a17cfdf8542d224f0d36095a2adc7aee4ef0a5"
-dependencies = [
- "cfg-if",
- "windows-sys 0.48.0",
-]
-
 [[package]]
 name = "x11rb"
 version = "0.13.1"
@@ -5434,7 +5525,7 @@ checksum = "fa4f8080344d4671fb4e831a13ad1e68092748387dfc4f55e356242fae12ce3e"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.74",
+ "syn 2.0.76",
 ]
 
 [[package]]
diff --git a/README.md b/README.md
index 5df1673c0dfe..5d7e3c3bf203 100644
--- a/README.md
+++ b/README.md
@@ -217,7 +217,7 @@ improvements point to the `main` branch of this repo.
 polars = { git = "https://github.com/pola-rs/polars", rev = "<optional git tag>" }
 ```
 
-Requires Rust version `>=1.79`.
+Requires Rust version `>=1.80`.
 
 ## Contributing
 
diff --git a/crates/polars-arrow/src/array/binary/mutable.rs b/crates/polars-arrow/src/array/binary/mutable.rs
index 53a8ed32bb6f..65d1ca928b75 100644
--- a/crates/polars-arrow/src/array/binary/mutable.rs
+++ b/crates/polars-arrow/src/array/binary/mutable.rs
@@ -442,9 +442,8 @@ impl<O: Offset, T: AsRef<[u8]>> TryPush<Option<T>> for MutableBinaryArray<O> {
             Some(value) => {
                 self.values.try_push(value.as_ref())?;
 
-                match &mut self.validity {
-                    Some(validity) => validity.push(true),
-                    None => {},
+                if let Some(validity) = &mut self.validity {
+                    validity.push(true)
                 }
             },
             None => {
diff --git a/crates/polars-arrow/src/array/binview/mutable.rs b/crates/polars-arrow/src/array/binview/mutable.rs
index 3258f18052e3..b7b4aac24ef9 100644
--- a/crates/polars-arrow/src/array/binview/mutable.rs
+++ b/crates/polars-arrow/src/array/binview/mutable.rs
@@ -573,6 +573,128 @@ impl MutableBinaryViewArray<[u8]> {
         }
         Ok(())
     }
+
+    /// Extend from a `buffer` and `length` of items given some statistics about the lengths.
+    ///
+    /// This will attempt to dispatch to several optimized implementations.
+    ///
+    /// # Safety
+    ///
+    /// This is safe if the statistics are correct.
+    pub unsafe fn extend_from_lengths_with_stats(
+        &mut self,
+        buffer: &[u8],
+        lengths_iterator: impl Clone + ExactSizeIterator<Item = usize>,
+        min_length: usize,
+        max_length: usize,
+        sum_length: usize,
+    ) {
+        let num_items = lengths_iterator.len();
+
+        if num_items == 0 {
+            return;
+        }
+
+        #[cfg(debug_assertions)]
+        {
+            let (min, max, sum) = lengths_iterator.clone().map(|v| (v, v, v)).fold(
+                (usize::MAX, usize::MIN, 0usize),
+                |(cmin, cmax, csum), (emin, emax, esum)| {
+                    (cmin.min(emin), cmax.max(emax), csum + esum)
+                },
+            );
+
+            assert_eq!(min, min_length);
+            assert_eq!(max, max_length);
+            assert_eq!(sum, sum_length);
+        }
+
+        assert!(sum_length <= buffer.len());
+
+        let mut buffer_offset = 0;
+        if min_length > View::MAX_INLINE_SIZE as usize
+            && (num_items == 1 || sum_length + self.in_progress_buffer.len() <= u32::MAX as usize)
+        {
+            let buffer_idx = self.completed_buffers().len() as u32;
+            let in_progress_buffer_offset = self.in_progress_buffer.len();
+
+            self.in_progress_buffer
+                .extend_from_slice(&buffer[..sum_length]);
+            self.views.extend(lengths_iterator.map(|length| {
+                // SAFETY: We asserted before that the sum of all lengths is smaller or equal to
+                // the buffer length.
+                let view_buffer =
+                    unsafe { buffer.get_unchecked(buffer_offset..buffer_offset + length) };
+
+                // SAFETY: We know that the minimum length > View::MAX_INLINE_SIZE. Therefore, this
+                // length is > View::MAX_INLINE_SIZE.
+                let view = unsafe {
+                    View::new_noninline_unchecked(
+                        view_buffer,
+                        buffer_idx,
+                        (buffer_offset + in_progress_buffer_offset) as u32,
+                    )
+                };
+                buffer_offset += length;
+                view
+            }));
+        } else if max_length <= View::MAX_INLINE_SIZE as usize {
+            // If the min and max are the same, we can dispatch to the optimized SIMD
+            // implementation.
+            if min_length == max_length {
+                let length = min_length;
+                if length == 0 {
+                    self.views
+                        .resize(self.views.len() + num_items, View::new_inline(&[]));
+                } else {
+                    View::extend_with_inlinable_strided(
+                        &mut self.views,
+                        &buffer[..length * num_items],
+                        length as u8,
+                    );
+                }
+            } else {
+                self.views.extend(lengths_iterator.map(|length| {
+                    // SAFETY: We asserted before that the sum of all lengths is smaller or equal
+                    // to the buffer length.
+                    let view_buffer =
+                        unsafe { buffer.get_unchecked(buffer_offset..buffer_offset + length) };
+
+                    // SAFETY: We know that each view has a length <= View::MAX_INLINE_SIZE because
+                    // the maximum length is <= View::MAX_INLINE_SIZE
+                    let view = unsafe { View::new_inline_unchecked(view_buffer) };
+                    buffer_offset += length;
+                    view
+                }));
+            }
+        } else {
+            // If all fails, just fall back to a base implementation.
+            self.reserve(num_items);
+            for length in lengths_iterator {
+                let value = &buffer[buffer_offset..buffer_offset + length];
+                buffer_offset += length;
+                self.push_value(value);
+            }
+        }
+    }
+
+    /// Extend from a `buffer` and `length` of items.
+    ///
+    /// This will attempt to dispatch to several optimized implementations.
+    #[inline]
+    pub fn extend_from_lengths(
+        &mut self,
+        buffer: &[u8],
+        lengths_iterator: impl Clone + ExactSizeIterator<Item = usize>,
+    ) {
+        let (min, max, sum) = lengths_iterator.clone().map(|v| (v, v, v)).fold(
+            (usize::MAX, 0usize, 0usize),
+            |(cmin, cmax, csum), (emin, emax, esum)| (cmin.min(emin), cmax.max(emax), csum + esum),
+        );
+
+        // SAFETY: We just collected the right stats.
+        unsafe { self.extend_from_lengths_with_stats(buffer, lengths_iterator, min, max, sum) }
+    }
 }
 
 impl<T: ViewType + ?Sized, P: AsRef<T>> Extend<Option<P>> for MutableBinaryViewArray<T> {
@@ -646,3 +768,54 @@ impl<T: ViewType + ?Sized, P: AsRef<T>> TryPush<Option<P>> for MutableBinaryView
         Ok(())
     }
 }
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    fn roundtrip(values: &[&[u8]]) -> bool {
+        let buffer = values
+            .iter()
+            .flat_map(|v| v.iter().copied())
+            .collect::<Vec<u8>>();
+        let lengths = values.iter().map(|v| v.len()).collect::<Vec<usize>>();
+        let mut bv = MutableBinaryViewArray::<[u8]>::with_capacity(values.len());
+
+        bv.extend_from_lengths(&buffer[..], lengths.into_iter());
+
+        &bv.values_iter().collect::<Vec<&[u8]>>()[..] == values
+    }
+
+    #[test]
+    fn extend_with_lengths_basic() {
+        assert!(roundtrip(&[]));
+        assert!(roundtrip(&[b"abc"]));
+        assert!(roundtrip(&[
+            b"a_very_very_long_string_that_is_not_inlinable"
+        ]));
+        assert!(roundtrip(&[
+            b"abc",
+            b"a_very_very_long_string_that_is_not_inlinable"
+        ]));
+    }
+
+    #[test]
+    fn extend_with_inlinable_fastpath() {
+        assert!(roundtrip(&[b"abc", b"defg", b"hix"]));
+        assert!(roundtrip(&[b"abc", b"defg", b"hix", b"xyza1234abcd"]));
+    }
+
+    #[test]
+    fn extend_with_inlinable_eq_len_fastpath() {
+        assert!(roundtrip(&[b"abc", b"def", b"hix"]));
+        assert!(roundtrip(&[b"abc", b"def", b"hix", b"xyz"]));
+    }
+
+    #[test]
+    fn extend_with_not_inlinable_fastpath() {
+        assert!(roundtrip(&[
+            b"a_very_long_string123",
+            b"a_longer_string_than_the_previous"
+        ]));
+    }
+}
diff --git a/crates/polars-arrow/src/array/binview/view.rs b/crates/polars-arrow/src/array/binview/view.rs
index 6542e2c9761b..d0cd8cd36eda 100644
--- a/crates/polars-arrow/src/array/binview/view.rs
+++ b/crates/polars-arrow/src/array/binview/view.rs
@@ -157,12 +157,12 @@ impl View {
     /// Extend a `Vec<View>` with inline views slices of `src` with `width`.
     ///
     /// This tries to use SIMD to optimize the copying and can be massively faster than doing a
-    /// `views.extend(src.chunks_exact(stride).map(View::new_inline))`.
+    /// `views.extend(src.chunks_exact(width).map(View::new_inline))`.
     ///
     /// # Panics
     ///
-    /// This function panics if `src.len()` is not divisible by `width` or if `width >
-    /// View::MAX_INLINE_SIZE`.
+    /// This function panics if `src.len()` is not divisible by `width`, `width >
+    /// View::MAX_INLINE_SIZE` or `width == 0`.
     pub fn extend_with_inlinable_strided(views: &mut Vec<Self>, src: &[u8], width: u8) {
         macro_rules! dispatch {
             ($n:ident = $match:ident in [$($v:literal),+ $(,)?] => $block:block, otherwise = $otherwise:expr) => {
@@ -180,17 +180,16 @@ impl View {
         }
 
         let width = width as usize;
-        assert_eq!(src.len() % width, 0);
+
+        assert!(width > 0);
         assert!(width <= View::MAX_INLINE_SIZE as usize);
+
+        assert_eq!(src.len() % width, 0);
+
         let num_values = src.len() / width;
 
         views.reserve(num_values);
 
-        if width == 0 {
-            views.resize(views.len() + num_values, View::new_inline(&[]));
-            return;
-        }
-
         #[allow(unused_mut)]
         let mut src = src;
 
diff --git a/crates/polars-arrow/src/array/boolean/mutable.rs b/crates/polars-arrow/src/array/boolean/mutable.rs
index 80d689806f1d..7f97f82762b0 100644
--- a/crates/polars-arrow/src/array/boolean/mutable.rs
+++ b/crates/polars-arrow/src/array/boolean/mutable.rs
@@ -101,9 +101,8 @@ impl MutableBooleanArray {
     #[inline]
     pub fn push_value(&mut self, value: bool) {
         self.values.push(value);
-        match &mut self.validity {
-            Some(validity) => validity.push(true),
-            None => {},
+        if let Some(validity) = &mut self.validity {
+            validity.push(true)
         }
     }
 
diff --git a/crates/polars-arrow/src/array/fixed_size_binary/mutable.rs b/crates/polars-arrow/src/array/fixed_size_binary/mutable.rs
index 8f81ce86f6d8..1c744dbe88fd 100644
--- a/crates/polars-arrow/src/array/fixed_size_binary/mutable.rs
+++ b/crates/polars-arrow/src/array/fixed_size_binary/mutable.rs
@@ -114,9 +114,8 @@ impl MutableFixedSizeBinaryArray {
                 }
                 self.values.extend_from_slice(bytes);
 
-                match &mut self.validity {
-                    Some(validity) => validity.push(true),
-                    None => {},
+                if let Some(validity) = &mut self.validity {
+                    validity.push(true)
                 }
             },
             None => {
diff --git a/crates/polars-arrow/src/array/mod.rs b/crates/polars-arrow/src/array/mod.rs
index c2c0c958032d..49e086853a37 100644
--- a/crates/polars-arrow/src/array/mod.rs
+++ b/crates/polars-arrow/src/array/mod.rs
@@ -195,6 +195,7 @@ pub trait Array: Send + Sync + dyn_clone::DynClone + 'static {
 dyn_clone::clone_trait_object!(Array);
 
 /// A trait describing a mutable array; i.e. an array whose values can be changed.
+///
 /// Mutable arrays cannot be cloned but can be mutated in place,
 /// thereby making them useful to perform numeric operations without allocations.
 /// As in [`Array`], concrete arrays (such as [`MutablePrimitiveArray`]) implement how they are mutated.
@@ -370,6 +371,7 @@ pub fn new_empty_array(data_type: ArrowDataType) -> Box<dyn Array> {
 }
 
 /// Creates a new [`Array`] of [`ArrowDataType`] `data_type` and `length`.
+///
 /// The array is guaranteed to have [`Array::null_count`] equal to [`Array::len`]
 /// for all types except Union, which does not have a validity.
 pub fn new_null_array(data_type: ArrowDataType, length: usize) -> Box<dyn Array> {
diff --git a/crates/polars-arrow/src/array/primitive/mutable.rs b/crates/polars-arrow/src/array/primitive/mutable.rs
index ae2025482f2c..53565dda831a 100644
--- a/crates/polars-arrow/src/array/primitive/mutable.rs
+++ b/crates/polars-arrow/src/array/primitive/mutable.rs
@@ -130,9 +130,8 @@ impl<T: NativeType> MutablePrimitiveArray<T> {
     #[inline]
     pub fn push_value(&mut self, value: T) {
         self.values.push(value);
-        match &mut self.validity {
-            Some(validity) => validity.push(true),
-            None => {},
+        if let Some(validity) = &mut self.validity {
+            validity.push(true)
         }
     }
 
diff --git a/crates/polars-arrow/src/array/utf8/mutable.rs b/crates/polars-arrow/src/array/utf8/mutable.rs
index ef9a5e8527b7..af4845680428 100644
--- a/crates/polars-arrow/src/array/utf8/mutable.rs
+++ b/crates/polars-arrow/src/array/utf8/mutable.rs
@@ -522,9 +522,8 @@ impl<O: Offset, T: AsRef<str>> TryPush<Option<T>> for MutableUtf8Array<O> {
             Some(value) => {
                 self.values.try_push(value.as_ref())?;
 
-                match &mut self.validity {
-                    Some(validity) => validity.push(true),
-                    None => {},
+                if let Some(validity) = &mut self.validity {
+                    validity.push(true)
                 }
             },
             None => {
diff --git a/crates/polars-arrow/src/bitmap/utils/slice_iterator.rs b/crates/polars-arrow/src/bitmap/utils/slice_iterator.rs
index dc388f1d41b5..f3083ad0b141 100644
--- a/crates/polars-arrow/src/bitmap/utils/slice_iterator.rs
+++ b/crates/polars-arrow/src/bitmap/utils/slice_iterator.rs
@@ -9,7 +9,8 @@ enum State {
     Finished,
 }
 
-/// Iterator over a bitmap that returns slices of set regions
+/// Iterator over a bitmap that returns slices of set regions.
+///
 /// This is the most efficient method to extract slices of values from arrays
 /// with a validity bitmap.
 /// For example, the bitmap `00101111` returns `[(0,4), (6,1)]`
diff --git a/crates/polars-arrow/src/compute/arity.rs b/crates/polars-arrow/src/compute/arity.rs
index e590e7b1974b..22ac733c2839 100644
--- a/crates/polars-arrow/src/compute/arity.rs
+++ b/crates/polars-arrow/src/compute/arity.rs
@@ -8,10 +8,10 @@ use crate::bitmap::{Bitmap, MutableBitmap};
 use crate::datatypes::ArrowDataType;
 use crate::types::NativeType;
 
-/// Applies an unary and infallible function to a [`PrimitiveArray`]. This is the
-/// fastest way to perform an operation on a [`PrimitiveArray`] when the benefits
-/// of a vectorized operation outweighs the cost of branching nulls and
-/// non-nulls.
+/// Applies an unary and infallible function to a [`PrimitiveArray`].
+///
+/// This is the /// fastest way to perform an operation on a [`PrimitiveArray`] when the benefits
+/// of a vectorized operation outweighs the cost of branching nulls and non-nulls.
 ///
 /// # Implementation
 /// This will apply the function for all values, including those on null slots.
@@ -131,11 +131,14 @@ where
     PrimitiveArray::<O>::new(data_type, values, validity)
 }
 
-/// Applies a binary operations to two primitive arrays. This is the fastest
-/// way to perform an operation on two primitive array when the benefits of a
+/// Applies a binary operations to two primitive arrays.
+///
+/// This is the fastest way to perform an operation on two primitive array when the benefits of a
 /// vectorized operation outweighs the cost of branching nulls and non-nulls.
+///
 /// # Errors
 /// This function errors iff the arrays have a different length.
+///
 /// # Implementation
 /// This will apply the function for all values, including those on null slots.
 /// This implies that the operation must be infallible for any value of the
diff --git a/crates/polars-arrow/src/compute/temporal.rs b/crates/polars-arrow/src/compute/temporal.rs
index 1198c04bb152..437089b72891 100644
--- a/crates/polars-arrow/src/compute/temporal.rs
+++ b/crates/polars-arrow/src/compute/temporal.rs
@@ -75,12 +75,14 @@ macro_rules! date_like {
 }
 
 /// Extracts the years of a temporal array as [`PrimitiveArray<i32>`].
+///
 /// Use [`can_year`] to check if this operation is supported for the target [`ArrowDataType`].
 pub fn year(array: &dyn Array) -> PolarsResult<PrimitiveArray<i32>> {
     date_like!(year, array, ArrowDataType::Int32)
 }
 
 /// Extracts the months of a temporal array as [`PrimitiveArray<i8>`].
+///
 /// Value ranges from 1 to 12.
 /// Use [`can_month`] to check if this operation is supported for the target [`ArrowDataType`].
 pub fn month(array: &dyn Array) -> PolarsResult<PrimitiveArray<i8>> {
@@ -88,6 +90,7 @@ pub fn month(array: &dyn Array) -> PolarsResult<PrimitiveArray<i8>> {
 }
 
 /// Extracts the days of a temporal array as [`PrimitiveArray<i8>`].
+///
 /// Value ranges from 1 to 32 (Last day depends on month).
 /// Use [`can_day`] to check if this operation is supported for the target [`ArrowDataType`].
 pub fn day(array: &dyn Array) -> PolarsResult<PrimitiveArray<i8>> {
@@ -95,13 +98,15 @@ pub fn day(array: &dyn Array) -> PolarsResult<PrimitiveArray<i8>> {
 }
 
 /// Extracts weekday of a temporal array as [`PrimitiveArray<i8>`].
+///
 /// Monday is 1, Tuesday is 2, ..., Sunday is 7.
 /// Use [`can_weekday`] to check if this operation is supported for the target [`ArrowDataType`]
 pub fn weekday(array: &dyn Array) -> PolarsResult<PrimitiveArray<i8>> {
     date_like!(i8_weekday, array, ArrowDataType::Int8)
 }
 
-/// Extracts ISO week of a temporal array as [`PrimitiveArray<i8>`]
+/// Extracts ISO week of a temporal array as [`PrimitiveArray<i8>`].
+///
 /// Value ranges from 1 to 53 (Last week depends on the year).
 /// Use [`can_iso_week`] to check if this operation is supported for the target [`ArrowDataType`]
 pub fn iso_week(array: &dyn Array) -> PolarsResult<PrimitiveArray<i8>> {
@@ -161,6 +166,7 @@ pub fn second(array: &dyn Array) -> PolarsResult<PrimitiveArray<i8>> {
 }
 
 /// Extracts the nanoseconds of a temporal array as [`PrimitiveArray<i32>`].
+///
 /// Value ranges from 0 to 1_999_999_999.
 /// The range from 1_000_000_000 to 1_999_999_999 represents the leap second.
 /// Use [`can_nanosecond`] to check if this operation is supported for the target [`ArrowDataType`].
diff --git a/crates/polars-arrow/src/datatypes/physical_type.rs b/crates/polars-arrow/src/datatypes/physical_type.rs
index 31693cefd4bd..174c0401ca3f 100644
--- a/crates/polars-arrow/src/datatypes/physical_type.rs
+++ b/crates/polars-arrow/src/datatypes/physical_type.rs
@@ -4,6 +4,7 @@ use serde::{Deserialize, Serialize};
 pub use crate::types::PrimitiveType;
 
 /// The set of physical types: unique in-memory representations of an Arrow array.
+///
 /// A physical type has a one-to-many relationship with a [`crate::datatypes::ArrowDataType`] and
 /// a one-to-one mapping to each struct in this crate that implements [`crate::array::Array`].
 #[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
diff --git a/crates/polars-arrow/src/legacy/kernels/take_agg/var.rs b/crates/polars-arrow/src/legacy/kernels/take_agg/var.rs
index 8fd54d712e94..62e2ba1353f2 100644
--- a/crates/polars-arrow/src/legacy/kernels/take_agg/var.rs
+++ b/crates/polars-arrow/src/legacy/kernels/take_agg/var.rs
@@ -1,6 +1,7 @@
 use super::*;
 
-/// Numerical stable online variance aggregation
+/// Numerical stable online variance aggregation.
+///
 /// See:
 /// Welford, B. P. (1962). "Note on a method for calculating corrected sums of squares and products".
 /// Technometrics. 4 (3): 419–420. doi:10.2307/1266577. JSTOR 1266577.
diff --git a/crates/polars-arrow/src/offset.rs b/crates/polars-arrow/src/offset.rs
index 33b3058cbb78..ae4583dfe6f4 100644
--- a/crates/polars-arrow/src/offset.rs
+++ b/crates/polars-arrow/src/offset.rs
@@ -518,6 +518,14 @@ impl<O: Offset> OffsetsBuffer<O> {
     pub fn into_inner(self) -> Buffer<O> {
         self.0
     }
+
+    /// Returns the offset difference between `start` and `end`.
+    #[inline]
+    pub fn delta(&self, start: usize, end: usize) -> usize {
+        assert!(start <= end);
+
+        (self.0[end + 1] - self.0[start]).to_usize()
+    }
 }
 
 impl From<&OffsetsBuffer<i32>> for OffsetsBuffer<i64> {
diff --git a/crates/polars-arrow/src/temporal_conversions.rs b/crates/polars-arrow/src/temporal_conversions.rs
index b10eef9694c0..487996094f37 100644
--- a/crates/polars-arrow/src/temporal_conversions.rs
+++ b/crates/polars-arrow/src/temporal_conversions.rs
@@ -267,6 +267,7 @@ pub fn parse_offset(offset: &str) -> PolarsResult<FixedOffset> {
 }
 
 /// Parses `value` to `Option<i64>` consistent with the Arrow's definition of timestamp with timezone.
+///
 /// `tz` must be built from `timezone` (either via [`parse_offset`] or `chrono-tz`).
 /// Returns in scale `tz` of `TimeUnit`.
 #[inline]
diff --git a/crates/polars-arrow/src/trusted_len.rs b/crates/polars-arrow/src/trusted_len.rs
index 5f194770e7c4..359edfd1b88c 100644
--- a/crates/polars-arrow/src/trusted_len.rs
+++ b/crates/polars-arrow/src/trusted_len.rs
@@ -3,6 +3,7 @@ use std::iter::Scan;
 use std::slice::Iter;
 
 /// An iterator of known, fixed size.
+///
 /// A trait denoting Rusts' unstable [TrustedLen](https://doc.rust-lang.org/std/iter/trait.TrustedLen.html).
 /// This is re-defined here and implemented for some iterators until `std::iter::TrustedLen`
 /// is stabilized.
@@ -98,6 +99,14 @@ where
     }
 }
 
+impl<J: Clone> TrustMyLength<std::iter::Take<std::iter::Repeat<J>>, J> {
+    /// Create a new `TrustMyLength` iterator that repeats `value` `len` times.
+    pub fn new_repeat_n(value: J, len: usize) -> Self {
+        // SAFETY: This is always safe since repeat(..).take(n) always repeats exactly `n` times`.
+        unsafe { Self::new(std::iter::repeat(value).take(len), len) }
+    }
+}
+
 impl<I, J> Iterator for TrustMyLength<I, J>
 where
     I: Iterator<Item = J>,
diff --git a/crates/polars-arrow/src/types/bit_chunk.rs b/crates/polars-arrow/src/types/bit_chunk.rs
index c618c5458515..be4445a5d77a 100644
--- a/crates/polars-arrow/src/types/bit_chunk.rs
+++ b/crates/polars-arrow/src/types/bit_chunk.rs
@@ -48,8 +48,10 @@ bit_chunk!(u16);
 bit_chunk!(u32);
 bit_chunk!(u64);
 
-/// An [`Iterator<Item=bool>`] over a [`BitChunk`]. This iterator is often
-/// compiled to SIMD.
+/// An [`Iterator<Item=bool>`] over a [`BitChunk`].
+///
+/// This iterator is often compiled to SIMD.
+///
 /// The [LSB](https://en.wikipedia.org/wiki/Bit_numbering#Least_significant_bit) corresponds
 /// to the first slot, as defined by the arrow specification.
 /// # Example
diff --git a/crates/polars-core/Cargo.toml b/crates/polars-core/Cargo.toml
index 882392f080cf..204f022ff3ae 100644
--- a/crates/polars-core/Cargo.toml
+++ b/crates/polars-core/Cargo.toml
@@ -64,7 +64,7 @@ performant = ["arrow/performant", "reinterpret"]
 # extra utilities for StringChunked
 strings = ["regex", "arrow/strings", "polars-error/regex"]
 # support for ObjectChunked<T> (downcastable Series of any type)
-object = ["serde_json"]
+object = ["serde_json", "algorithm_group_by"]
 
 fmt = ["comfy-table/tty"]
 fmt_no_tty = ["comfy-table"]
@@ -93,9 +93,9 @@ diagonal_concat = []
 dataframe_arithmetic = []
 product = []
 unique_counts = []
-partition_by = []
+partition_by = ["algorithm_group_by"]
 describe = []
-timezones = ["chrono-tz", "arrow/chrono-tz", "arrow/timezones"]
+timezones = ["temporal", "chrono", "chrono-tz", "arrow/chrono-tz", "arrow/timezones"]
 dynamic_group_by = ["dtype-datetime", "dtype-date"]
 arrow_rs = ["arrow-array", "arrow/arrow_rs"]
 
diff --git a/crates/polars-core/src/chunked_array/object/registry.rs b/crates/polars-core/src/chunked_array/object/registry.rs
index 5ebcad2a022a..ef5febddad76 100644
--- a/crates/polars-core/src/chunked_array/object/registry.rs
+++ b/crates/polars-core/src/chunked_array/object/registry.rs
@@ -1,4 +1,5 @@
 //! This is a heap allocated utility that can be used to register an object type.
+//!
 //! That object type will know its own generic type parameter `T` and callers can simply
 //! send `&Any` values and don't have to know the generic type themselves.
 use std::any::Any;
diff --git a/crates/polars-core/src/chunked_array/ops/mod.rs b/crates/polars-core/src/chunked_array/ops/mod.rs
index c3d030447794..f946fce715e6 100644
--- a/crates/polars-core/src/chunked_array/ops/mod.rs
+++ b/crates/polars-core/src/chunked_array/ops/mod.rs
@@ -121,6 +121,7 @@ pub trait ChunkTakeUnchecked<Idx: ?Sized> {
 }
 
 /// Create a `ChunkedArray` with new values by index or by boolean mask.
+///
 /// Note that these operations clone data. This is however the only way we can modify at mask or
 /// index level as the underlying Arrow arrays are immutable.
 pub trait ChunkSet<'a, A, B> {
@@ -461,7 +462,7 @@ pub trait ChunkFilter<T: PolarsDataType> {
 /// Create a new ChunkedArray filled with values at that index.
 pub trait ChunkExpandAtIndex<T: PolarsDataType> {
     /// Create a new ChunkedArray filled with values at that index.
-    fn new_from_index(&self, length: usize, index: usize) -> ChunkedArray<T>;
+    fn new_from_index(&self, index: usize, length: usize) -> ChunkedArray<T>;
 }
 
 macro_rules! impl_chunk_expand {
@@ -536,7 +537,7 @@ impl ChunkExpandAtIndex<ListType> for ListChunked {
 
 #[cfg(feature = "dtype-struct")]
 impl ChunkExpandAtIndex<StructType> for StructChunked {
-    fn new_from_index(&self, length: usize, index: usize) -> ChunkedArray<StructType> {
+    fn new_from_index(&self, index: usize, length: usize) -> ChunkedArray<StructType> {
         let (chunk_idx, idx) = self.index_to_chunked_index(index);
         let chunk = self.downcast_chunks().get(chunk_idx).unwrap();
         let chunk = if chunk.is_null(idx) {
diff --git a/crates/polars-core/src/chunked_array/ops/search_sorted.rs b/crates/polars-core/src/chunked_array/ops/search_sorted.rs
index e31599429aae..5e97f0818176 100644
--- a/crates/polars-core/src/chunked_array/ops/search_sorted.rs
+++ b/crates/polars-core/src/chunked_array/ops/search_sorted.rs
@@ -38,8 +38,10 @@ where
 }
 
 /// Search through a series of chunks for the first position where f(x) is true,
-/// assuming it is first always false and then always true. It repeats this for
-/// each value in search_values. If the search value is null null_idx is returned.
+/// assuming it is first always false and then always true.
+///
+/// It repeats this for each value in search_values. If the search value is null null_idx is
+/// returned.
 ///
 /// Assumes the chunks are non-empty.
 pub fn lower_bound_chunks<'a, T, F>(
diff --git a/crates/polars-core/src/datatypes/_serde.rs b/crates/polars-core/src/datatypes/_serde.rs
index ee5839663ddf..fd79b5bf6566 100644
--- a/crates/polars-core/src/datatypes/_serde.rs
+++ b/crates/polars-core/src/datatypes/_serde.rs
@@ -4,6 +4,7 @@
 //! We could use [serde_1712](https://github.com/serde-rs/serde/issues/1712), but that gave problems caused by
 //! [rust_96956](https://github.com/rust-lang/rust/issues/96956), so we make a dummy type without static
 
+#[cfg(feature = "dtype-categorical")]
 use serde::de::SeqAccess;
 use serde::{Deserialize, Serialize};
 
diff --git a/crates/polars-core/src/datatypes/time_unit.rs b/crates/polars-core/src/datatypes/time_unit.rs
index 481de22249b1..d3a9a61443fb 100644
--- a/crates/polars-core/src/datatypes/time_unit.rs
+++ b/crates/polars-core/src/datatypes/time_unit.rs
@@ -58,7 +58,7 @@ impl TimeUnit {
     }
 }
 
-#[cfg(feature = "rows")]
+#[cfg(any(feature = "rows", feature = "object"))]
 #[cfg(any(feature = "dtype-datetime", feature = "dtype-duration"))]
 #[inline]
 pub(crate) fn convert_time_units(v: i64, tu_l: TimeUnit, tu_r: TimeUnit) -> i64 {
diff --git a/crates/polars-core/src/frame/explode.rs b/crates/polars-core/src/frame/explode.rs
index c772031f01da..906b18dcedb7 100644
--- a/crates/polars-core/src/frame/explode.rs
+++ b/crates/polars-core/src/frame/explode.rs
@@ -1,5 +1,7 @@
 use arrow::offset::OffsetsBuffer;
 use rayon::prelude::*;
+#[cfg(feature = "serde")]
+use serde::{Deserialize, Serialize};
 use smartstring::alias::String as SmartString;
 
 use crate::chunked_array::ops::explode::offsets_to_indexes;
@@ -18,6 +20,7 @@ fn get_exploded(series: &Series) -> PolarsResult<(Series, OffsetsBuffer<i64>)> {
 
 /// Arguments for `[DataFrame::unpivot]` function
 #[derive(Clone, Default, Debug, PartialEq, Eq, Hash)]
+#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
 pub struct UnpivotArgsIR {
     pub on: Vec<SmartString>,
     pub index: Vec<SmartString>,
diff --git a/crates/polars-core/src/frame/mod.rs b/crates/polars-core/src/frame/mod.rs
index 8894e488695a..1923d370ebc1 100644
--- a/crates/polars-core/src/frame/mod.rs
+++ b/crates/polars-core/src/frame/mod.rs
@@ -61,6 +61,37 @@ pub enum UniqueKeepStrategy {
     Any,
 }
 
+fn ensure_names_unique<T, F>(items: &[T], mut get_name: F) -> PolarsResult<()>
+where
+    F: FnMut(&T) -> &str,
+{
+    // Always unique.
+    if items.len() <= 1 {
+        return Ok(());
+    }
+
+    if items.len() <= 4 {
+        // Too small to be worth spawning a hashmap for, this is at most 6 comparisons.
+        for i in 0..items.len() - 1 {
+            let name = get_name(&items[i]);
+            for other in items.iter().skip(i + 1) {
+                if name == get_name(other) {
+                    polars_bail!(duplicate = name);
+                }
+            }
+        }
+    } else {
+        let mut names = PlHashSet::with_capacity(items.len());
+        for item in items {
+            let name = get_name(item);
+            if !names.insert(name) {
+                polars_bail!(duplicate = name);
+            }
+        }
+    }
+    Ok(())
+}
+
 /// A contiguous growable collection of `Series` that have the same length.
 ///
 /// ## Use declarations
@@ -221,89 +252,62 @@ impl DataFrame {
     /// let df = DataFrame::new(vec![s0, s1])?;
     /// # Ok::<(), PolarsError>(())
     /// ```
-    pub fn new<S: IntoSeries>(columns: Vec<S>) -> PolarsResult<Self> {
-        let mut first_len = None;
+    pub fn new(columns: Vec<Series>) -> PolarsResult<Self> {
+        ensure_names_unique(&columns, |s| s.name())?;
 
-        let shape_err = |&first_name, &first_len, &name, &len| {
-            polars_bail!(
-                ShapeMismatch: "could not create a new DataFrame: series {:?} has length {} \
-                while series {:?} has length {}",
-                first_name, first_len, name, len
-            );
-        };
+        if columns.len() > 1 {
+            let first_len = columns[0].len();
+            for col in &columns {
+                polars_ensure!(
+                    col.len() == first_len,
+                    ShapeMismatch: "could not create a new DataFrame: series {:?} has length {} while series {:?} has length {}",
+                    columns[0].len(), first_len, col.name(), col.len()
+                );
+            }
+        }
 
-        let series_cols = if S::is_series() {
-            // SAFETY:
-            // we are guarded by the type system here.
-            #[allow(clippy::transmute_undefined_repr)]
-            let series_cols = unsafe { std::mem::transmute::<Vec<S>, Vec<Series>>(columns) };
-            let mut names = PlHashSet::with_capacity(series_cols.len());
-
-            for s in &series_cols {
-                let name = s.name();
-
-                match first_len {
-                    Some(len) => {
-                        if s.len() != len {
-                            let first_series = &series_cols.first().unwrap();
-                            return shape_err(
-                                &first_series.name(),
-                                &first_series.len(),
-                                &name,
-                                &s.len(),
-                            );
-                        }
-                    },
-                    None => first_len = Some(s.len()),
-                }
+        Ok(DataFrame { columns })
+    }
 
-                if !names.insert(name) {
-                    polars_bail!(duplicate = name);
-                }
-            }
-            // we drop early as the brchk thinks the &str borrows are used when calling the drop
-            // of both `series_cols` and `names`
-            drop(names);
-            series_cols
-        } else {
-            let mut series_cols: Vec<Series> = Vec::with_capacity(columns.len());
-            let mut names = PlHashSet::with_capacity(columns.len());
-
-            // check for series length equality and convert into series in one pass
-            for s in columns {
-                let series = s.into_series();
-                // we have aliasing borrows so we must allocate a string
-                let name = series.name().to_string();
-
-                match first_len {
-                    Some(len) => {
-                        if series.len() != len {
-                            let first_series = &series_cols.first().unwrap();
-                            return shape_err(
-                                &first_series.name(),
-                                &first_series.len(),
-                                &name.as_str(),
-                                &series.len(),
-                            );
-                        }
-                    },
-                    None => first_len = Some(series.len()),
-                }
+    /// Converts a sequence of columns into a DataFrame, broadcasting length-1
+    /// columns to match the other columns.
+    pub fn new_with_broadcast(columns: Vec<Series>) -> PolarsResult<Self> {
+        ensure_names_unique(&columns, |s| s.name())?;
+        unsafe { Self::new_with_broadcast_no_checks(columns) }
+    }
 
-                if names.contains(&name) {
-                    polars_bail!(duplicate = name);
+    /// Converts a sequence of columns into a DataFrame, broadcasting length-1
+    /// columns to match the other columns.
+    ///  
+    /// # Safety
+    /// Does not check that the column names are unique (which they must be).
+    pub unsafe fn new_with_broadcast_no_checks(mut columns: Vec<Series>) -> PolarsResult<Self> {
+        // The length of the longest non-unit length column determines the
+        // broadcast length. If all columns are unit-length the broadcast length
+        // is one.
+        let broadcast_len = columns
+            .iter()
+            .map(|s| s.len())
+            .filter(|l| *l != 1)
+            .max()
+            .unwrap_or(1);
+
+        for col in &mut columns {
+            // Length not equal to the broadcast len, needs broadcast or is an error.
+            let len = col.len();
+            if len != broadcast_len {
+                if len != 1 {
+                    let name = col.name().to_owned();
+                    let longest_column = columns.iter().max_by_key(|c| c.len()).unwrap().name();
+                    polars_bail!(
+                        ShapeMismatch: "could not create a new DataFrame: series {:?} has length {} while series {:?} has length {}",
+                        name, len, longest_column, broadcast_len
+                    );
                 }
-
-                series_cols.push(series);
-                names.insert(name);
+                *col = col.new_from_index(0, broadcast_len);
             }
-            drop(names);
-            series_cols
-        };
-
-        Ok(DataFrame {
-            columns: series_cols,
-        })
+        }
+        Ok(unsafe { DataFrame::new_no_checks(columns) })
     }
 
     /// Creates an empty `DataFrame` usable in a compile time context (such as static initializers).
@@ -442,16 +446,7 @@ impl DataFrame {
     /// It is the callers responsibility to uphold the contract of all `Series`
     /// having an equal length, if not this may panic down the line.
     pub unsafe fn new_no_length_checks(columns: Vec<Series>) -> PolarsResult<DataFrame> {
-        let mut names = PlHashSet::with_capacity(columns.len());
-        for column in &columns {
-            let name = column.name();
-            if !names.insert(name) {
-                polars_bail!(duplicate = name);
-            }
-        }
-        // we drop early as the brchk thinks the &str borrows are used when calling the drop
-        // of both `columns` and `names`
-        drop(names);
+        ensure_names_unique(&columns, |s| s.name())?;
         Ok(DataFrame { columns })
     }
 
@@ -637,12 +632,7 @@ impl DataFrame {
             ShapeMismatch: "{} column names provided for a DataFrame of width {}",
             names.len(), self.width()
         );
-        let unique_names: PlHashSet<&str> =
-            PlHashSet::from_iter(names.iter().map(|name| name.as_ref()));
-        polars_ensure!(
-            unique_names.len() == self.width(),
-            Duplicate: "duplicate column names found"
-        );
+        ensure_names_unique(names, |s| s.as_ref())?;
 
         let columns = mem::take(&mut self.columns);
         self.columns = columns
@@ -1171,8 +1161,15 @@ impl DataFrame {
     /// # Safety
     /// The caller must ensure `column.len() == self.height()` .
     pub unsafe fn with_column_unchecked(&mut self, column: Series) -> &mut Self {
-        self.get_columns_mut().push(column);
-        self
+        #[cfg(debug_assertions)]
+        {
+            return self.with_column(column).unwrap();
+        }
+        #[cfg(not(debug_assertions))]
+        {
+            self.get_columns_mut().push(column);
+            self
+        }
     }
 
     fn add_column_by_schema(&mut self, s: Series, schema: &Schema) -> PolarsResult<()> {
@@ -1440,7 +1437,7 @@ impl DataFrame {
     }
 
     pub fn _select_impl(&self, cols: &[SmartString]) -> PolarsResult<Self> {
-        self.select_check_duplicates(cols)?;
+        ensure_names_unique(cols, |s| s.as_str())?;
         self._select_impl_unchecked(cols)
     }
 
@@ -1486,7 +1483,7 @@ impl DataFrame {
         check_duplicates: bool,
     ) -> PolarsResult<Self> {
         if check_duplicates {
-            self.select_check_duplicates(cols)?;
+            ensure_names_unique(cols, |s| s.as_str())?;
         }
         let selected = self.select_series_impl_with_schema(cols, schema)?;
         Ok(unsafe { DataFrame::new_no_checks(selected) })
@@ -1519,21 +1516,11 @@ impl DataFrame {
     }
 
     fn select_physical_impl(&self, cols: &[SmartString]) -> PolarsResult<Self> {
-        self.select_check_duplicates(cols)?;
+        ensure_names_unique(cols, |s| s.as_str())?;
         let selected = self.select_series_physical_impl(cols)?;
         Ok(unsafe { DataFrame::new_no_checks(selected) })
     }
 
-    fn select_check_duplicates(&self, cols: &[SmartString]) -> PolarsResult<()> {
-        let mut names = PlHashSet::with_capacity(cols.len());
-        for name in cols {
-            if !names.insert(name.as_str()) {
-                polars_bail!(duplicate = name);
-            }
-        }
-        Ok(())
-    }
-
     /// Select column(s) from this [`DataFrame`] and return them into a [`Vec`].
     ///
     /// # Example
@@ -1705,16 +1692,16 @@ impl DataFrame {
     /// }
     /// ```
     pub fn rename(&mut self, column: &str, name: &str) -> PolarsResult<&mut Self> {
+        if column == name {
+            return Ok(self);
+        }
+        polars_ensure!(
+            self.columns.iter().all(|c| c.name() != name),
+            Duplicate: "column rename attempted with already existing name \"{name}\""
+        );
         self.select_mut(column)
             .ok_or_else(|| polars_err!(col_not_found = column))
             .map(|s| s.rename(name))?;
-        let unique_names: PlHashSet<&str> =
-            PlHashSet::from_iter(self.columns.iter().map(|s| s.name()));
-        polars_ensure!(
-            unique_names.len() == self.width(),
-            Duplicate: "duplicate column names found"
-        );
-        drop(unique_names);
         Ok(self)
     }
 
diff --git a/crates/polars-core/src/hashing/identity.rs b/crates/polars-core/src/hashing/identity.rs
index 7554395ac50c..e917291f1586 100644
--- a/crates/polars-core/src/hashing/identity.rs
+++ b/crates/polars-core/src/hashing/identity.rs
@@ -36,6 +36,7 @@ pub type IdBuildHasher = BuildHasherDefault<IdHasher>;
 
 #[derive(Debug)]
 /// Contains an idx of a row in a DataFrame and the precomputed hash of that row.
+///
 /// That hash still needs to be used to create another hash to be able to resize hashmaps without
 /// accidental quadratic behavior. So do not use an Identity function!
 pub struct IdxHash {
diff --git a/crates/polars-core/src/hashing/mod.rs b/crates/polars-core/src/hashing/mod.rs
index 1ac43c2888bf..8f966eb2f317 100644
--- a/crates/polars-core/src/hashing/mod.rs
+++ b/crates/polars-core/src/hashing/mod.rs
@@ -39,6 +39,7 @@ pub(crate) unsafe fn compare_df_rows(keys: &DataFrame, idx_a: usize, idx_b: usiz
 }
 
 /// Populate a multiple key hashmap with row indexes.
+///
 /// Instead of the keys (which could be very large), the row indexes are stored.
 /// To check if a row is equal the original DataFrame is also passed as ref.
 /// When a hash collision occurs the indexes are ptrs to the rows and the rows are compared
diff --git a/crates/polars-core/src/schema.rs b/crates/polars-core/src/schema.rs
index e6c121824893..8f04d1bb20be 100644
--- a/crates/polars-core/src/schema.rs
+++ b/crates/polars-core/src/schema.rs
@@ -5,6 +5,7 @@ use arrow::datatypes::ArrowSchemaRef;
 use indexmap::map::MutableKeys;
 use indexmap::IndexMap;
 use polars_utils::aliases::PlRandomState;
+use polars_utils::itertools::Itertools;
 #[cfg(feature = "serde-lazy")]
 use serde::{Deserialize, Serialize};
 use smartstring::alias::String as SmartString;
@@ -66,12 +67,12 @@ where
 }
 
 impl Schema {
-    /// Create a new, empty schema
+    /// Create a new, empty schema.
     pub fn new() -> Self {
         Self::with_capacity(0)
     }
 
-    /// Create a new, empty schema with capacity
+    /// Create a new, empty schema with the given capacity.
     ///
     /// If you know the number of fields you have ahead of time, using this is more efficient than using
     /// [`new`][Self::new]. Also consider using [`Schema::from_iter`] if you have the collection of fields available
@@ -87,7 +88,7 @@ impl Schema {
         self.inner.reserve(additional);
     }
 
-    /// The number of fields in the schema
+    /// The number of fields in the schema.
     #[inline]
     pub fn len(&self) -> usize {
         self.inner.len()
@@ -98,7 +99,7 @@ impl Schema {
         self.inner.is_empty()
     }
 
-    /// Rename field `old` to `new`, and return the (owned) old name
+    /// Rename field `old` to `new`, and return the (owned) old name.
     ///
     /// If `old` is not present in the schema, the schema is not modified and `None` is returned. Otherwise the schema
     /// is updated and `Some(old_name)` is returned.
@@ -114,7 +115,7 @@ impl Schema {
         Some(old_name)
     }
 
-    /// Create a new schema from this one, inserting a field with `name` and `dtype` at the given `index`
+    /// Create a new schema from this one, inserting a field with `name` and `dtype` at the given `index`.
     ///
     /// If a field named `name` already exists, it is updated with the new dtype. Regardless, the field named `name` is
     /// always moved to the given index. Valid indices range from `0` (front of the schema) to `self.len()` (after the
@@ -150,7 +151,7 @@ impl Schema {
         Ok(new)
     }
 
-    /// Insert a field with `name` and `dtype` at the given `index` into this schema
+    /// Insert a field with `name` and `dtype` at the given `index` into this schema.
     ///
     /// If a field named `name` already exists, it is updated with the new dtype. Regardless, the field named `name` is
     /// always moved to the given index. Valid indices range from `0` (front of the schema) to `self.len()` (after the
@@ -189,32 +190,32 @@ impl Schema {
         Ok(old_dtype)
     }
 
-    /// Get a reference to the dtype of the field named `name`, or `None` if the field doesn't exist
+    /// Get a reference to the dtype of the field named `name`, or `None` if the field doesn't exist.
     pub fn get(&self, name: &str) -> Option<&DataType> {
         self.inner.get(name)
     }
 
-    /// Get a reference to the dtype of the field named `name`, or `Err(PolarsErr)` if the field doesn't exist
+    /// Get a reference to the dtype of the field named `name`, or `Err(PolarsErr)` if the field doesn't exist.
     pub fn try_get(&self, name: &str) -> PolarsResult<&DataType> {
         self.get(name)
             .ok_or_else(|| polars_err!(SchemaFieldNotFound: "{}", name))
     }
 
-    /// Get a mutable reference to the dtype of the field named `name`, or `Err(PolarsErr)` if the field doesn't exist
+    /// Get a mutable reference to the dtype of the field named `name`, or `Err(PolarsErr)` if the field doesn't exist.
     pub fn try_get_mut(&mut self, name: &str) -> PolarsResult<&mut DataType> {
         self.inner
             .get_mut(name)
             .ok_or_else(|| polars_err!(SchemaFieldNotFound: "{}", name))
     }
 
-    /// Return all data about the field named `name`: its index in the schema, its name, and its dtype
+    /// Return all data about the field named `name`: its index in the schema, its name, and its dtype.
     ///
     /// Returns `Some((index, &name, &dtype))` if the field exists, `None` if it doesn't.
     pub fn get_full(&self, name: &str) -> Option<(usize, &SmartString, &DataType)> {
         self.inner.get_full(name)
     }
 
-    /// Return all data about the field named `name`: its index in the schema, its name, and its dtype
+    /// Return all data about the field named `name`: its index in the schema, its name, and its dtype.
     ///
     /// Returns `Ok((index, &name, &dtype))` if the field exists, `Err(PolarsErr)` if it doesn't.
     pub fn try_get_full(&self, name: &str) -> PolarsResult<(usize, &SmartString, &DataType)> {
@@ -223,7 +224,7 @@ impl Schema {
             .ok_or_else(|| polars_err!(SchemaFieldNotFound: "{}", name))
     }
 
-    /// Look up the name in the schema and return an owned [`Field`] by cloning the data
+    /// Look up the name in the schema and return an owned [`Field`] by cloning the data.
     ///
     /// Returns `None` if the field does not exist.
     ///
@@ -235,7 +236,7 @@ impl Schema {
             .map(|dtype| Field::new(name, dtype.clone()))
     }
 
-    /// Look up the name in the schema and return an owned [`Field`] by cloning the data
+    /// Look up the name in the schema and return an owned [`Field`] by cloning the data.
     ///
     /// Returns `Err(PolarsErr)` if the field does not exist.
     ///
@@ -248,7 +249,7 @@ impl Schema {
             .map(|dtype| Field::new(name, dtype.clone()))
     }
 
-    /// Get references to the name and dtype of the field at `index`
+    /// Get references to the name and dtype of the field at `index`.
     ///
     /// If `index` is inbounds, returns `Some((&name, &dtype))`, else `None`. See
     /// [`get_at_index_mut`][Self::get_at_index_mut] for a mutable version.
@@ -260,7 +261,7 @@ impl Schema {
         self.inner.get_index(index).ok_or_else(|| polars_err!(ComputeError: "index {index} out of bounds with 'schema' of len: {}", self.len()))
     }
 
-    /// Get mutable references to the name and dtype of the field at `index`
+    /// Get mutable references to the name and dtype of the field at `index`.
     ///
     /// If `index` is inbounds, returns `Some((&mut name, &mut dtype))`, else `None`. See
     /// [`get_at_index`][Self::get_at_index] for an immutable version.
@@ -268,7 +269,7 @@ impl Schema {
         self.inner.get_index_mut2(index)
     }
 
-    /// Swap-remove a field by name and, if the field existed, return its dtype
+    /// Swap-remove a field by name and, if the field existed, return its dtype.
     ///
     /// If the field does not exist, the schema is not modified and `None` is returned.
     ///
@@ -279,7 +280,7 @@ impl Schema {
         self.inner.swap_remove(name)
     }
 
-    /// Remove a field by name, preserving order, and, if the field existed, return its dtype
+    /// Remove a field by name, preserving order, and, if the field existed, return its dtype.
     ///
     /// If the field does not exist, the schema is not modified and `None` is returned.
     ///
@@ -289,7 +290,7 @@ impl Schema {
         self.inner.shift_remove(name)
     }
 
-    /// Remove a field by name, preserving order, and, if the field existed, return its dtype
+    /// Remove a field by name, preserving order, and, if the field existed, return its dtype.
     ///
     /// If the field does not exist, the schema is not modified and `None` is returned.
     ///
@@ -299,12 +300,12 @@ impl Schema {
         self.inner.shift_remove_index(index)
     }
 
-    /// Whether the schema contains a field named `name`
+    /// Whether the schema contains a field named `name`.
     pub fn contains(&self, name: &str) -> bool {
         self.get(name).is_some()
     }
 
-    /// Change the field named `name` to the given `dtype` and return the previous dtype
+    /// Change the field named `name` to the given `dtype` and return the previous dtype.
     ///
     /// If `name` doesn't already exist in the schema, the schema is not modified and `None` is returned. Otherwise
     /// returns `Some(old_dtype)`.
@@ -316,7 +317,7 @@ impl Schema {
         Some(std::mem::replace(old_dtype, dtype))
     }
 
-    /// Change the field at the given index to the given `dtype` and return the previous dtype
+    /// Change the field at the given index to the given `dtype` and return the previous dtype.
     ///
     /// If the index is out of bounds, the schema is not modified and `None` is returned. Otherwise returns
     /// `Some(old_dtype)`.
@@ -328,7 +329,7 @@ impl Schema {
         Some(std::mem::replace(old_dtype, dtype))
     }
 
-    /// Insert a new column in the [`Schema`]
+    /// Insert a new column in the [`Schema`].
     ///
     /// If an equivalent name already exists in the schema: the name remains and
     /// retains in its place in the order, its corresponding value is updated
@@ -344,7 +345,7 @@ impl Schema {
         self.inner.insert(name, dtype)
     }
 
-    /// Merge `other` into `self`
+    /// Merge `other` into `self`.
     ///
     /// Merging logic:
     /// - Fields that occur in `self` but not `other` are unmodified
@@ -355,7 +356,7 @@ impl Schema {
         self.inner.extend(other.inner)
     }
 
-    /// Merge borrowed `other` into `self`
+    /// Merge borrowed `other` into `self`.
     ///
     /// Merging logic:
     /// - Fields that occur in `self` but not `other` are unmodified
@@ -370,7 +371,7 @@ impl Schema {
         )
     }
 
-    /// Convert self to `ArrowSchema` by cloning the fields
+    /// Convert self to `ArrowSchema` by cloning the fields.
     pub fn to_arrow(&self, compat_level: CompatLevel) -> ArrowSchema {
         let fields: Vec<_> = self
             .inner
@@ -380,7 +381,7 @@ impl Schema {
         ArrowSchema::from(fields)
     }
 
-    /// Iterates the [`Field`]s in this schema, constructing them anew by cloning each `(&name, &dtype)` pair
+    /// Iterates the [`Field`]s in this schema, constructing them anew by cloning each `(&name, &dtype)` pair.
     ///
     /// Note that this clones each name and dtype in order to form an owned [`Field`]. For a clone-free version, use
     /// [`iter`][Self::iter], which returns `(&name, &dtype)`.
@@ -390,22 +391,22 @@ impl Schema {
             .map(|(name, dtype)| Field::new(name, dtype.clone()))
     }
 
-    /// Iterates over references to the dtypes in this schema
+    /// Iterates over references to the dtypes in this schema.
     pub fn iter_dtypes(&self) -> impl '_ + ExactSizeIterator<Item = &DataType> {
         self.inner.iter().map(|(_name, dtype)| dtype)
     }
 
-    /// Iterates over mut references to the dtypes in this schema
+    /// Iterates over mut references to the dtypes in this schema.
     pub fn iter_dtypes_mut(&mut self) -> impl '_ + ExactSizeIterator<Item = &mut DataType> {
         self.inner.iter_mut().map(|(_name, dtype)| dtype)
     }
 
-    /// Iterates over references to the names in this schema
+    /// Iterates over references to the names in this schema.
     pub fn iter_names(&self) -> impl '_ + ExactSizeIterator<Item = &SmartString> {
         self.inner.iter().map(|(name, _dtype)| name)
     }
 
-    /// Iterates over the `(&name, &dtype)` pairs in this schema
+    /// Iterates over the `(&name, &dtype)` pairs in this schema.
     ///
     /// For an owned version, use [`iter_fields`][Self::iter_fields], which clones the data to iterate owned `Field`s
     pub fn iter(&self) -> impl Iterator<Item = (&SmartString, &DataType)> + '_ {
@@ -426,6 +427,27 @@ impl Schema {
         }
         Ok(changed)
     }
+
+    /// Generates another schema with just the specified columns selected from this one.
+    pub fn select<I>(&self, columns: I) -> PolarsResult<Self>
+    where
+        I: IntoIterator,
+        I::Item: AsRef<str>,
+    {
+        Ok(Self {
+            inner: columns
+                .into_iter()
+                .map(|c| {
+                    let name = c.as_ref();
+                    let dtype = self
+                        .inner
+                        .get(name)
+                        .ok_or_else(|| polars_err!(col_not_found = name))?;
+                    PolarsResult::Ok((SmartString::from(name), dtype.clone()))
+                })
+                .try_collect()?,
+        })
+    }
 }
 
 pub type SchemaRef = Arc<Schema>;
@@ -439,7 +461,7 @@ impl IntoIterator for Schema {
     }
 }
 
-/// This trait exists to be unify the API of polars Schema and arrows Schema
+/// This trait exists to be unify the API of polars Schema and arrows Schema.
 pub trait IndexOfSchema: Debug {
     /// Get the index of a column by name.
     fn index_of(&self, name: &str) -> Option<usize>;
diff --git a/crates/polars-core/src/serde/series.rs b/crates/polars-core/src/serde/series.rs
index 006edd96d604..49e9b6d004be 100644
--- a/crates/polars-core/src/serde/series.rs
+++ b/crates/polars-core/src/serde/series.rs
@@ -2,6 +2,7 @@ use std::borrow::Cow;
 use std::fmt::Formatter;
 
 use serde::de::{Error as DeError, MapAccess, Visitor};
+#[cfg(feature = "object")]
 use serde::ser::Error as SerError;
 use serde::{de, Deserialize, Deserializer, Serialize, Serializer};
 
diff --git a/crates/polars-core/src/series/implementations/struct__.rs b/crates/polars-core/src/series/implementations/struct__.rs
index 767eec146195..07b35502dd6b 100644
--- a/crates/polars-core/src/series/implementations/struct__.rs
+++ b/crates/polars-core/src/series/implementations/struct__.rs
@@ -149,7 +149,7 @@ impl SeriesTrait for SeriesWrap<StructChunked> {
     }
 
     fn new_from_index(&self, _index: usize, _length: usize) -> Series {
-        self.0.new_from_index(_length, _index).into_series()
+        self.0.new_from_index(_index, _length).into_series()
     }
 
     fn cast(&self, dtype: &DataType, cast_options: CastOptions) -> PolarsResult<Series> {
diff --git a/crates/polars-core/src/testing.rs b/crates/polars-core/src/testing.rs
index cb9f6e5389ab..91d6b998c671 100644
--- a/crates/polars-core/src/testing.rs
+++ b/crates/polars-core/src/testing.rs
@@ -162,7 +162,9 @@ impl PartialEq for DataFrame {
 }
 
 /// Asserts that two expressions of type [`DataFrame`] are equal according to [`DataFrame::equals`]
-/// at runtime. If the expression are not equal, the program will panic with a message that displays
+/// at runtime.
+///
+/// If the expression are not equal, the program will panic with a message that displays
 /// both dataframes.
 #[macro_export]
 macro_rules! assert_df_eq {
diff --git a/crates/polars-core/src/utils/mod.rs b/crates/polars-core/src/utils/mod.rs
index d1a48a981cf8..4a078ae0f2c6 100644
--- a/crates/polars-core/src/utils/mod.rs
+++ b/crates/polars-core/src/utils/mod.rs
@@ -40,7 +40,8 @@ pub fn _set_partition_size() -> usize {
     POOL.current_num_threads()
 }
 
-/// Just a wrapper structure. Useful for certain impl specializations
+/// Just a wrapper structure which is useful for certain impl specializations.
+///
 /// This is for instance use to implement
 /// `impl<T> FromIterator<T::Native> for NoNull<ChunkedArray<T>>`
 /// as `Option<T::Native>` was already implemented:
@@ -848,6 +849,16 @@ where
 pub(crate) fn align_chunks_binary_owned_series(left: Series, right: Series) -> (Series, Series) {
     match (left.chunks().len(), right.chunks().len()) {
         (1, 1) => (left, right),
+        // All chunks are equal length
+        (a, b)
+            if a == b
+                && left
+                    .chunk_lengths()
+                    .zip(right.chunk_lengths())
+                    .all(|(l, r)| l == r) =>
+        {
+            (left, right)
+        },
         (_, 1) => (left.rechunk(), right),
         (1, _) => (left, right.rechunk()),
         (_, _) => (left.rechunk(), right.rechunk()),
@@ -864,6 +875,16 @@ where
 {
     match (left.chunks.len(), right.chunks.len()) {
         (1, 1) => (left, right),
+        // All chunks are equal length
+        (a, b)
+            if a == b
+                && left
+                    .chunk_lengths()
+                    .zip(right.chunk_lengths())
+                    .all(|(l, r)| l == r) =>
+        {
+            (left, right)
+        },
         (_, 1) => (left.rechunk(), right),
         (1, _) => (left, right.rechunk()),
         (_, _) => (left.rechunk(), right.rechunk()),
@@ -1161,6 +1182,22 @@ pub fn coalesce_nulls_series(a: &Series, b: &Series) -> (Series, Series) {
     }
 }
 
+pub fn operation_exceeded_idxsize_msg(operation: &str) -> String {
+    if core::mem::size_of::<IdxSize>() == core::mem::size_of::<u32>() {
+        format!(
+            "{} exceeded the maximum supported limit of {} rows. Consider installing 'polars-u64-idx'.",
+            operation,
+            IdxSize::MAX,
+        )
+    } else {
+        format!(
+            "{} exceeded the maximum supported limit of {} rows.",
+            operation,
+            IdxSize::MAX,
+        )
+    }
+}
+
 #[cfg(test)]
 mod test {
     use super::*;
diff --git a/crates/polars-error/src/lib.rs b/crates/polars-error/src/lib.rs
index e0989141d2b6..f664c1acf3f3 100644
--- a/crates/polars-error/src/lib.rs
+++ b/crates/polars-error/src/lib.rs
@@ -6,11 +6,27 @@ use std::collections::TryReserveError;
 use std::error::Error;
 use std::fmt::{self, Display, Formatter, Write};
 use std::ops::Deref;
-use std::sync::Arc;
+use std::sync::{Arc, LazyLock};
 use std::{env, io};
 
 pub use warning::*;
 
+enum ErrorStrategy {
+    Panic,
+    WithBacktrace,
+    Normal,
+}
+
+static ERROR_STRATEGY: LazyLock<ErrorStrategy> = LazyLock::new(|| {
+    if env::var("POLARS_PANIC_ON_ERR").as_deref() == Ok("1") {
+        ErrorStrategy::Panic
+    } else if env::var("POLARS_BACKTRACE_IN_ERR").as_deref() == Ok("1") {
+        ErrorStrategy::WithBacktrace
+    } else {
+        ErrorStrategy::Normal
+    }
+});
+
 #[derive(Debug)]
 pub struct ErrString(Cow<'static, str>);
 
@@ -25,10 +41,14 @@ where
     T: Into<Cow<'static, str>>,
 {
     fn from(msg: T) -> Self {
-        if env::var("POLARS_PANIC_ON_ERR").as_deref().unwrap_or("") == "1" {
-            panic!("{}", msg.into())
-        } else {
-            ErrString(msg.into())
+        match &*ERROR_STRATEGY {
+            ErrorStrategy::Panic => panic!("{}", msg.into()),
+            ErrorStrategy::WithBacktrace => ErrString(Cow::Owned(format!(
+                "{}\n\nRust backtrace:\n{}",
+                msg.into(),
+                std::backtrace::Backtrace::force_capture()
+            ))),
+            ErrorStrategy::Normal => ErrString(msg.into()),
         }
     }
 }
@@ -184,7 +204,7 @@ impl PolarsError {
         }
     }
 
-    fn wrap_msg<F: FnOnce(&str) -> String>(&self, func: F) -> Self {
+    pub fn wrap_msg<F: FnOnce(&str) -> String>(&self, func: F) -> Self {
         use PolarsError::*;
         match self {
             ColumnNotFound(msg) => ColumnNotFound(func(msg).into()),
diff --git a/crates/polars-expr/src/expressions/apply.rs b/crates/polars-expr/src/expressions/apply.rs
index 4d13d784540e..802e130d15f2 100644
--- a/crates/polars-expr/src/expressions/apply.rs
+++ b/crates/polars-expr/src/expressions/apply.rs
@@ -240,10 +240,7 @@ impl ApplyExpr {
         // then unpack the lists and finally create iterators from this list chunked arrays.
         let mut iters = acs
             .iter_mut()
-            .map(|ac| {
-                // SAFETY: unstable series never lives longer than the iterator.
-                unsafe { ac.iter_groups(self.pass_name_to_apply) }
-            })
+            .map(|ac| ac.iter_groups(self.pass_name_to_apply))
             .collect::<Vec<_>>();
 
         // Length of the items to iterate over.
diff --git a/crates/polars-expr/src/expressions/binary.rs b/crates/polars-expr/src/expressions/binary.rs
index ce26c1a57c77..55caf00ad69a 100644
--- a/crates/polars-expr/src/expressions/binary.rs
+++ b/crates/polars-expr/src/expressions/binary.rs
@@ -151,15 +151,13 @@ impl BinaryExpr {
         mut ac_r: AggregationContext<'a>,
     ) -> PolarsResult<AggregationContext<'a>> {
         let name = ac_l.series().name().to_string();
-        // SAFETY: unstable series never lives longer than the iterator.
-        let ca = unsafe {
-            ac_l.iter_groups(false)
-                .zip(ac_r.iter_groups(false))
-                .map(|(l, r)| Some(apply_operator(l?.as_ref(), r?.as_ref(), self.op)))
-                .map(|opt_res| opt_res.transpose())
-                .collect::<PolarsResult<ListChunked>>()?
-                .with_name(&name)
-        };
+        let ca = ac_l
+            .iter_groups(false)
+            .zip(ac_r.iter_groups(false))
+            .map(|(l, r)| Some(apply_operator(l?.as_ref(), r?.as_ref(), self.op)))
+            .map(|opt_res| opt_res.transpose())
+            .collect::<PolarsResult<ListChunked>>()?
+            .with_name(&name);
 
         ac_l.with_update_groups(UpdateGroups::WithSeriesLen);
         ac_l.with_agg_state(AggState::AggregatedList(ca.into_series()));
diff --git a/crates/polars-expr/src/expressions/filter.rs b/crates/polars-expr/src/expressions/filter.rs
index d9df88419ae7..db9ee0cf120e 100644
--- a/crates/polars-expr/src/expressions/filter.rs
+++ b/crates/polars-expr/src/expressions/filter.rs
@@ -45,10 +45,15 @@ impl PhysicalExpr for FilterExpr {
 
         let (ac_s, ac_predicate) = POOL.install(|| rayon::join(ac_s_f, ac_predicate_f));
         let (mut ac_s, mut ac_predicate) = (ac_s?, ac_predicate?);
+        // Check if the groups are still equal, otherwise aggregate.
+        // TODO! create a special group iters that don't materialize
+        if ac_s.groups.as_ref() as *const _ != ac_predicate.groups.as_ref() as *const _ {
+            let _ = ac_s.aggregated();
+            let _ = ac_predicate.aggregated();
+        }
 
         if ac_predicate.is_aggregated() || ac_s.is_aggregated() {
-            // SAFETY: unstable series never lives longer than the iterator.
-            let preds = unsafe { ac_predicate.iter_groups(false) };
+            let preds = ac_predicate.iter_groups(false);
             let s = ac_s.aggregated();
             let ca = s.list()?;
             let out = if ca.is_empty() {
diff --git a/crates/polars-expr/src/expressions/gather.rs b/crates/polars-expr/src/expressions/gather.rs
index c54f8b9e8262..951833717a33 100644
--- a/crates/polars-expr/src/expressions/gather.rs
+++ b/crates/polars-expr/src/expressions/gather.rs
@@ -253,21 +253,19 @@ impl GatherExpr {
             ac.series().name(),
         )?;
 
-        unsafe {
-            let iter = ac.iter_groups(false).zip(idx.iter_groups(false));
-            for (s, idx) in iter {
-                match (s, idx) {
-                    (Some(s), Some(idx)) => {
-                        let idx = convert_to_unsigned_index(idx.as_ref(), s.as_ref().len())?;
-                        let out = s.as_ref().take(&idx)?;
-                        builder.append_series(&out)?;
-                    },
-                    _ => builder.append_null(),
-                };
-            }
-            let out = builder.finish().into_series();
-            ac.with_agg_state(AggState::AggregatedList(out));
+        let iter = ac.iter_groups(false).zip(idx.iter_groups(false));
+        for (s, idx) in iter {
+            match (s, idx) {
+                (Some(s), Some(idx)) => {
+                    let idx = convert_to_unsigned_index(idx.as_ref(), s.as_ref().len())?;
+                    let out = s.as_ref().take(&idx)?;
+                    builder.append_series(&out)?;
+                },
+                _ => builder.append_null(),
+            };
         }
+        let out = builder.finish().into_series();
+        ac.with_agg_state(AggState::AggregatedList(out));
         Ok(ac)
     }
 }
diff --git a/crates/polars-expr/src/expressions/group_iter.rs b/crates/polars-expr/src/expressions/group_iter.rs
index 8c921a519bd1..26c68fdae3d2 100644
--- a/crates/polars-expr/src/expressions/group_iter.rs
+++ b/crates/polars-expr/src/expressions/group_iter.rs
@@ -5,10 +5,7 @@ use polars_core::series::amortized_iter::AmortSeries;
 use super::*;
 
 impl<'a> AggregationContext<'a> {
-    /// # Safety
-    /// The lifetime of [AmortSeries] is bound to the iterator. Keeping it alive
-    /// longer than the iterator is UB.
-    pub(super) unsafe fn iter_groups(
+    pub(super) fn iter_groups(
         &mut self,
         keep_names: bool,
     ) -> Box<dyn Iterator<Item = Option<AmortSeries>> + '_> {
diff --git a/crates/polars-expr/src/expressions/mod.rs b/crates/polars-expr/src/expressions/mod.rs
index 17179f89cbdd..266d577b22ee 100644
--- a/crates/polars-expr/src/expressions/mod.rs
+++ b/crates/polars-expr/src/expressions/mod.rs
@@ -421,7 +421,9 @@ impl<'a> AggregationContext<'a> {
                 self.groups();
                 let rows = self.groups.len();
                 let s = s.new_from_index(0, rows);
-                s.reshape_list(&[rows as i64, -1]).unwrap()
+                let out = s.reshape_list(&[rows as i64, -1]).unwrap();
+                self.state = AggState::AggregatedList(out.clone());
+                out
             },
         }
     }
diff --git a/crates/polars-expr/src/expressions/ternary.rs b/crates/polars-expr/src/expressions/ternary.rs
index b84e868efd35..e3c2f9e833a2 100644
--- a/crates/polars-expr/src/expressions/ternary.rs
+++ b/crates/polars-expr/src/expressions/ternary.rs
@@ -37,26 +37,23 @@ fn finish_as_iters<'a>(
     mut ac_falsy: AggregationContext<'a>,
     mut ac_mask: AggregationContext<'a>,
 ) -> PolarsResult<AggregationContext<'a>> {
-    // SAFETY: unstable series never lives longer than the iterator.
-    let ca = unsafe {
-        ac_truthy
-            .iter_groups(false)
-            .zip(ac_falsy.iter_groups(false))
-            .zip(ac_mask.iter_groups(false))
-            .map(|((truthy, falsy), mask)| {
-                match (truthy, falsy, mask) {
-                    (Some(truthy), Some(falsy), Some(mask)) => Some(
-                        truthy
-                            .as_ref()
-                            .zip_with(mask.as_ref().bool()?, falsy.as_ref()),
-                    ),
-                    _ => None,
-                }
-                .transpose()
-            })
-            .collect::<PolarsResult<ListChunked>>()?
-            .with_name(ac_truthy.series().name())
-    };
+    let ca = ac_truthy
+        .iter_groups(false)
+        .zip(ac_falsy.iter_groups(false))
+        .zip(ac_mask.iter_groups(false))
+        .map(|((truthy, falsy), mask)| {
+            match (truthy, falsy, mask) {
+                (Some(truthy), Some(falsy), Some(mask)) => Some(
+                    truthy
+                        .as_ref()
+                        .zip_with(mask.as_ref().bool()?, falsy.as_ref()),
+                ),
+                _ => None,
+            }
+            .transpose()
+        })
+        .collect::<PolarsResult<ListChunked>>()?
+        .with_name(ac_truthy.series().name());
 
     // Aggregation leaves only a single chunk.
     let arr = ca.downcast_iter().next().unwrap();
diff --git a/crates/polars-io/Cargo.toml b/crates/polars-io/Cargo.toml
index ef569e61519f..4a5fd97bf689 100644
--- a/crates/polars-io/Cargo.toml
+++ b/crates/polars-io/Cargo.toml
@@ -28,6 +28,7 @@ fast-float = { workspace = true, optional = true }
 flate2 = { workspace = true, optional = true }
 futures = { workspace = true, optional = true }
 glob = { version = "0.3" }
+hashbrown = { workspace = true }
 itoa = { workspace = true, optional = true }
 memchr = { workspace = true }
 memmap = { workspace = true }
@@ -40,7 +41,7 @@ regex = { workspace = true }
 reqwest = { workspace = true, optional = true }
 ryu = { workspace = true, optional = true }
 serde = { workspace = true, features = ["rc"], optional = true }
-serde_json = { version = "1", default-features = false, features = ["alloc"], optional = true }
+serde_json = { version = "1", optional = true }
 simd-json = { workspace = true, optional = true }
 simdutf8 = { workspace = true, optional = true }
 smartstring = { workspace = true }
@@ -100,7 +101,7 @@ dtype-struct = ["polars-core/dtype-struct"]
 dtype-decimal = ["polars-core/dtype-decimal", "polars-json?/dtype-decimal"]
 fmt = ["polars-core/fmt"]
 lazy = []
-parquet = ["polars-parquet", "polars-parquet/compression"]
+parquet = ["polars-parquet", "polars-parquet/compression", "polars-core/partition_by"]
 async = [
   "async-trait",
   "futures",
@@ -121,12 +122,11 @@ cloud = [
   "reqwest",
   "http",
 ]
-file_cache = ["async", "dep:blake3", "dep:fs4"]
+file_cache = ["async", "dep:blake3", "dep:fs4", "serde_json", "cloud"]
 aws = ["object_store/aws", "cloud", "reqwest"]
 azure = ["object_store/azure", "cloud"]
 gcp = ["object_store/gcp", "cloud"]
 http = ["object_store/http", "cloud"]
-partition = ["polars-core/partition_by"]
 temporal = ["dtype-datetime", "dtype-date", "dtype-time"]
 simd = []
 python = ["polars-error/python"]
diff --git a/crates/polars-io/src/cloud/adaptors.rs b/crates/polars-io/src/cloud/adaptors.rs
index 435d703f2d80..5e034b55a80c 100644
--- a/crates/polars-io/src/cloud/adaptors.rs
+++ b/crates/polars-io/src/cloud/adaptors.rs
@@ -11,11 +11,13 @@ use tokio::io::AsyncWriteExt;
 use super::CloudOptions;
 use crate::pl_async::get_runtime;
 
-/// Adaptor which wraps the interface of [ObjectStore::BufWriter](https://docs.rs/object_store/latest/object_store/buffered/struct.BufWriter.html)
-/// exposing a synchronous interface which implements `std::io::Write`.
+/// Adaptor which wraps the interface of [ObjectStore::BufWriter] exposing a synchronous interface
+/// which implements `std::io::Write`.
 ///
 /// This allows it to be used in sync code which would otherwise write to a simple File or byte stream,
 /// such as with `polars::prelude::CsvWriter`.
+///
+/// [ObjectStore::BufWriter]: https://docs.rs/object_store/latest/object_store/buffered/struct.BufWriter.html
 pub struct CloudWriter {
     // Internal writer, constructed at creation
     writer: BufWriter,
diff --git a/crates/polars-io/src/cloud/options.rs b/crates/polars-io/src/cloud/options.rs
index de0968a80da0..ca9016d05a96 100644
--- a/crates/polars-io/src/cloud/options.rs
+++ b/crates/polars-io/src/cloud/options.rs
@@ -277,7 +277,7 @@ impl CloudOptions {
             &mut builder,
             &[(
                 Path::new("~/.aws/config"),
-                &[("region = (.*)\n", AmazonS3ConfigKey::Region)],
+                &[("region\\s*=\\s*(.*)\n", AmazonS3ConfigKey::Region)],
             )],
         );
         read_config(
@@ -285,9 +285,12 @@ impl CloudOptions {
             &[(
                 Path::new("~/.aws/credentials"),
                 &[
-                    ("aws_access_key_id = (.*)\n", AmazonS3ConfigKey::AccessKeyId),
                     (
-                        "aws_secret_access_key = (.*)\n",
+                        "aws_access_key_id\\s*=\\s*(.*)\n",
+                        AmazonS3ConfigKey::AccessKeyId,
+                    ),
+                    (
+                        "aws_secret_access_key\\s*=\\s*(.*)\n",
                         AmazonS3ConfigKey::SecretAccessKey,
                     ),
                 ],
diff --git a/crates/polars-io/src/cloud/polars_object_store.rs b/crates/polars-io/src/cloud/polars_object_store.rs
index f2744432bfa0..cd72d568f2fb 100644
--- a/crates/polars-io/src/cloud/polars_object_store.rs
+++ b/crates/polars-io/src/cloud/polars_object_store.rs
@@ -16,6 +16,7 @@ use crate::pl_async::{
 /// concurrent requests for the entire application.
 #[derive(Debug, Clone)]
 pub struct PolarsObjectStore(Arc<dyn ObjectStore>);
+pub type ObjectStorePath = object_store::path::Path;
 
 impl PolarsObjectStore {
     pub fn new(store: Arc<dyn ObjectStore>) -> Self {
@@ -82,8 +83,31 @@ impl PolarsObjectStore {
 
     /// Fetch the metadata of the parquet file, do not memoize it.
     pub async fn head(&self, path: &Path) -> PolarsResult<ObjectMeta> {
-        with_concurrency_budget(1, || self.0.head(path))
-            .await
-            .map_err(to_compute_err)
+        with_concurrency_budget(1, || async {
+            let head_result = self.0.head(path).await;
+
+            if head_result.is_err() {
+                // Pre-signed URLs forbid the HEAD method, but we can still retrieve the header
+                // information with a range 0-0 request.
+                let get_range_0_0_result = self
+                    .0
+                    .get_opts(
+                        path,
+                        object_store::GetOptions {
+                            range: Some((0..1).into()),
+                            ..Default::default()
+                        },
+                    )
+                    .await;
+
+                if let Ok(v) = get_range_0_0_result {
+                    return Ok(v.meta);
+                }
+            }
+
+            head_result
+        })
+        .await
+        .map_err(to_compute_err)
     }
 }
diff --git a/crates/polars-io/src/csv/read/schema_inference.rs b/crates/polars-io/src/csv/read/schema_inference.rs
index 189c54501c12..bdbd8296f7fe 100644
--- a/crates/polars-io/src/csv/read/schema_inference.rs
+++ b/crates/polars-io/src/csv/read/schema_inference.rs
@@ -502,7 +502,7 @@ fn infer_file_schema_inner(
 
 pub(super) fn check_decimal_comma(decimal_comma: bool, separator: u8) -> PolarsResult<()> {
     if decimal_comma {
-        polars_ensure!(b',' != separator, InvalidOperation: "'decimal_comma' argument cannot be combined with ',' quote char")
+        polars_ensure!(b',' != separator, InvalidOperation: "'decimal_comma' argument cannot be combined with ',' separator")
     }
     Ok(())
 }
diff --git a/crates/polars-io/src/json/mod.rs b/crates/polars-io/src/json/mod.rs
index 99dbd53ffa5d..f4158abe69e7 100644
--- a/crates/polars-io/src/json/mod.rs
+++ b/crates/polars-io/src/json/mod.rs
@@ -71,6 +71,7 @@ use std::ops::Deref;
 use arrow::legacy::conversion::chunk_to_struct;
 use polars_core::error::to_compute_err;
 use polars_core::prelude::*;
+use polars_error::{polars_bail, PolarsResult};
 use polars_json::json::write::FallibleStreamingIterator;
 #[cfg(feature = "serde")]
 use serde::{Deserialize, Serialize};
@@ -86,9 +87,11 @@ pub struct JsonWriterOptions {
     pub maintain_order: bool,
 }
 
-/// The format to use to write the DataFrame to JSON: `Json` (a JSON array) or `JsonLines` (each row output on a
-/// separate line). In either case, each row is serialized as a JSON object whose keys are the column names and whose
-/// values are the row's corresponding values.
+/// The format to use to write the DataFrame to JSON: `Json` (a JSON array)
+/// or `JsonLines` (each row output on a separate line).
+///
+/// In either case, each row is serialized as a JSON object whose keys are the column names and
+/// whose values are the row's corresponding values.
 pub enum JsonFormat {
     /// A single JSON array containing each DataFrame row as an object. The length of the array is the number of rows in
     /// the DataFrame.
@@ -222,6 +225,17 @@ where
     json_format: JsonFormat,
 }
 
+pub fn remove_bom(bytes: &[u8]) -> PolarsResult<&[u8]> {
+    if bytes.starts_with(&[0xEF, 0xBB, 0xBF]) {
+        // UTF-8 BOM
+        Ok(&bytes[3..])
+    } else if bytes.starts_with(&[0xFE, 0xFF]) || bytes.starts_with(&[0xFF, 0xFE]) {
+        // UTF-16 BOM
+        polars_bail!(ComputeError: "utf-16 not supported")
+    } else {
+        Ok(bytes)
+    }
+}
 impl<'a, R> SerReader<R> for JsonReader<'a, R>
 where
     R: MmapBytesReader,
@@ -251,8 +265,9 @@ where
     /// incompatible types in the input. In the event that a column contains mixed dtypes, is it unspecified whether an
     /// error is returned or whether elements of incompatible dtypes are replaced with `null`.
     fn finish(mut self) -> PolarsResult<DataFrame> {
-        let rb: ReaderBytes = (&mut self.reader).into();
-
+        let pre_rb: ReaderBytes = (&mut self.reader).into();
+        let bytes = remove_bom(pre_rb.deref())?;
+        let rb = ReaderBytes::Borrowed(bytes);
         let out = match self.json_format {
             JsonFormat::Json => {
                 polars_ensure!(!self.ignore_errors, InvalidOperation: "'ignore_errors' only supported in ndjson");
diff --git a/crates/polars-io/src/lib.rs b/crates/polars-io/src/lib.rs
index 5aa6e7fcebab..f3540f4e13fd 100644
--- a/crates/polars-io/src/lib.rs
+++ b/crates/polars-io/src/lib.rs
@@ -1,6 +1,7 @@
 #![cfg_attr(docsrs, feature(doc_auto_cfg))]
 #![cfg_attr(feature = "simd", feature(portable_simd))]
 #![allow(ambiguous_glob_reexports)]
+extern crate core;
 
 #[cfg(feature = "avro")]
 pub mod avro;
diff --git a/crates/polars-io/src/ndjson/core.rs b/crates/polars-io/src/ndjson/core.rs
index 3390d1004b9d..706e49c80f42 100644
--- a/crates/polars-io/src/ndjson/core.rs
+++ b/crates/polars-io/src/ndjson/core.rs
@@ -14,7 +14,7 @@ use crate::mmap::{MmapBytesReader, ReaderBytes};
 use crate::ndjson::buffer::*;
 use crate::predicates::PhysicalIoExpr;
 use crate::prelude::*;
-use crate::RowIndex;
+use crate::{RowIndex, SerReader};
 const NEWLINE: u8 = b'\n';
 const CLOSING_BRACKET: u8 = b'}';
 
diff --git a/crates/polars-io/src/parquet/read/async_impl.rs b/crates/polars-io/src/parquet/read/async_impl.rs
index 97e4829581bc..812011af48bf 100644
--- a/crates/polars-io/src/parquet/read/async_impl.rs
+++ b/crates/polars-io/src/parquet/read/async_impl.rs
@@ -18,6 +18,7 @@ use crate::cloud::{
     build_object_store, object_path_from_str, CloudLocation, CloudOptions, PolarsObjectStore,
 };
 use crate::parquet::metadata::FileMetaDataRef;
+use crate::parquet::read::metadata::PartitionedColumnChunkMD;
 use crate::pl_async::get_runtime;
 use crate::predicates::PhysicalIoExpr;
 
@@ -277,8 +278,19 @@ impl FetchRowGroupsFromObjectStore {
             row_group_range
                 .filter_map(|i| {
                     let rg = &row_groups[i];
+
+                    // TODO!
+                    // Optimize this. Now we partition the predicate columns twice. (later on reading as well)
+                    // I think we must add metadata context where we can cache and amortize the partitioning.
+                    let mut part_md = PartitionedColumnChunkMD::new(rg);
+                    let live = pred.live_variables();
+                    part_md.set_partitions(
+                        live.as_ref()
+                            .map(|vars| vars.iter().map(|s| s.as_ref()).collect::<PlHashSet<_>>())
+                            .as_ref(),
+                    );
                     let should_be_read =
-                        matches!(read_this_row_group(Some(pred), rg, &schema), Ok(true));
+                        matches!(read_this_row_group(Some(pred), &part_md, &schema), Ok(true));
 
                     // Already add the row groups that will be skipped to the prefetched data.
                     if !should_be_read {
diff --git a/crates/polars-io/src/parquet/read/metadata.rs b/crates/polars-io/src/parquet/read/metadata.rs
new file mode 100644
index 000000000000..8f1a2c1642c8
--- /dev/null
+++ b/crates/polars-io/src/parquet/read/metadata.rs
@@ -0,0 +1,57 @@
+use hashbrown::hash_map::RawEntryMut;
+use polars_parquet::read::{ColumnChunkMetaData, RowGroupMetaData};
+use polars_utils::aliases::{PlHashMap, PlHashSet};
+use polars_utils::idx_vec::UnitVec;
+use polars_utils::unitvec;
+
+/// This is a utility struct that Partitions the `ColumnChunkMetaData` by `field.name == descriptor.path_in_schema[0]`
+/// This is required to fix quadratic behavior in wide parquet files. See #18319.
+pub struct PartitionedColumnChunkMD<'a> {
+    partitions: Option<PlHashMap<String, UnitVec<usize>>>,
+    metadata: &'a RowGroupMetaData,
+}
+
+impl<'a> PartitionedColumnChunkMD<'a> {
+    pub fn new(metadata: &'a RowGroupMetaData) -> Self {
+        Self {
+            partitions: Default::default(),
+            metadata,
+        }
+    }
+
+    pub(super) fn num_rows(&self) -> usize {
+        self.metadata.num_rows()
+    }
+
+    pub fn set_partitions(&mut self, field_names: Option<&PlHashSet<&str>>) {
+        let mut partitions = PlHashMap::default();
+        for (i, ccmd) in self.metadata.columns().iter().enumerate() {
+            let name = &ccmd.descriptor().path_in_schema[0];
+            if field_names
+                .map(|field_names| field_names.contains(name.as_str()))
+                .unwrap_or(true)
+            {
+                let entry = partitions.raw_entry_mut().from_key(name.as_str());
+
+                match entry {
+                    RawEntryMut::Vacant(slot) => {
+                        slot.insert(name.to_string(), unitvec![i]);
+                    },
+                    RawEntryMut::Occupied(mut slot) => {
+                        slot.get_mut().push(i);
+                    },
+                };
+            }
+        }
+        self.partitions = Some(partitions)
+    }
+
+    pub fn get_partitions(&self, name: &str) -> Option<UnitVec<&ColumnChunkMetaData>> {
+        let columns = self.metadata.columns();
+        self.partitions
+            .as_ref()
+            .expect("fields should be partitioned first")
+            .get(name)
+            .map(|idx| idx.iter().map(|i| &columns[*i]).collect::<UnitVec<_>>())
+    }
+}
diff --git a/crates/polars-io/src/parquet/read/mmap.rs b/crates/polars-io/src/parquet/read/mmap.rs
index 4489247e1a6f..84725fd7a2e1 100644
--- a/crates/polars-io/src/parquet/read/mmap.rs
+++ b/crates/polars-io/src/parquet/read/mmap.rs
@@ -6,8 +6,7 @@ use bytes::Bytes;
 use polars_core::datatypes::PlHashMap;
 use polars_error::PolarsResult;
 use polars_parquet::read::{
-    column_iter_to_arrays, get_field_columns, BasicDecompressor, ColumnChunkMetaData, Filter,
-    PageReader,
+    column_iter_to_arrays, BasicDecompressor, ColumnChunkMetaData, Filter, PageReader,
 };
 use polars_utils::mmap::{MemReader, MemSlice};
 
@@ -32,11 +31,10 @@ pub enum ColumnStore {
 /// For cloud files the relevant memory regions should have been prefetched.
 pub(super) fn mmap_columns<'a>(
     store: &'a ColumnStore,
-    columns: &'a [ColumnChunkMetaData],
-    field_name: &str,
+    field_columns: &'a [&ColumnChunkMetaData],
 ) -> Vec<(&'a ColumnChunkMetaData, MemSlice)> {
-    get_field_columns(columns, field_name)
-        .into_iter()
+    field_columns
+        .iter()
         .map(|meta| _mmap_single_column(store, meta))
         .collect()
 }
@@ -63,7 +61,7 @@ fn _mmap_single_column<'a>(
 
 // similar to arrow2 serializer, except this accepts a slice instead of a vec.
 // this allows us to memory map
-pub(super) fn to_deserializer(
+pub fn to_deserializer(
     columns: Vec<(&ColumnChunkMetaData, MemSlice)>,
     field: Field,
     filter: Option<Filter>,
diff --git a/crates/polars-io/src/parquet/read/mod.rs b/crates/polars-io/src/parquet/read/mod.rs
index 9b965172c375..b6b337c3ff6e 100644
--- a/crates/polars-io/src/parquet/read/mod.rs
+++ b/crates/polars-io/src/parquet/read/mod.rs
@@ -16,6 +16,7 @@
 
 #[cfg(feature = "cloud")]
 mod async_impl;
+mod metadata;
 mod mmap;
 mod options;
 mod predicates;
@@ -37,3 +38,9 @@ use polars_error::{ErrString, PolarsError};
 pub use reader::ParquetAsyncReader;
 pub use reader::{BatchedParquetReader, ParquetReader};
 pub use utils::materialize_empty_df;
+
+pub mod _internal {
+    pub use super::metadata::PartitionedColumnChunkMD;
+    pub use super::mmap::to_deserializer;
+    pub use super::predicates::read_this_row_group;
+}
diff --git a/crates/polars-io/src/parquet/read/predicates.rs b/crates/polars-io/src/parquet/read/predicates.rs
index d3775864e1a3..565ef53f4edd 100644
--- a/crates/polars-io/src/parquet/read/predicates.rs
+++ b/crates/polars-io/src/parquet/read/predicates.rs
@@ -1,8 +1,7 @@
-use arrow::datatypes::ArrowSchemaRef;
 use polars_core::prelude::*;
 use polars_parquet::read::statistics::{deserialize, Statistics};
-use polars_parquet::read::RowGroupMetaData;
 
+use crate::parquet::read::metadata::PartitionedColumnChunkMD;
 use crate::predicates::{BatchStats, ColumnStats, PhysicalIoExpr};
 
 impl ColumnStats {
@@ -16,37 +15,43 @@ impl ColumnStats {
     }
 }
 
-/// Collect the statistics in a column chunk.
+/// Collect the statistics in a row-group
 pub(crate) fn collect_statistics(
-    md: &RowGroupMetaData,
+    part_md: &PartitionedColumnChunkMD,
     schema: &ArrowSchema,
 ) -> PolarsResult<Option<BatchStats>> {
-    let mut stats = vec![];
+    // TODO! fix this performance. This is a full sequential scan.
+    let stats = schema
+        .fields
+        .iter()
+        .map(|field| match part_md.get_partitions(&field.name) {
+            Some(md) => {
+                let st = deserialize(field, &md)?;
+                Ok(ColumnStats::from_arrow_stats(st, field))
+            },
+            None => Ok(ColumnStats::new(field.into(), None, None, None)),
+        })
+        .collect::<PolarsResult<Vec<_>>>()?;
 
-    for field in schema.fields.iter() {
-        let st = deserialize(field, md)?;
-        stats.push(ColumnStats::from_arrow_stats(st, field));
+    if stats.is_empty() {
+        return Ok(None);
     }
 
-    Ok(if stats.is_empty() {
-        None
-    } else {
-        Some(BatchStats::new(
-            Arc::new(schema.into()),
-            stats,
-            Some(md.num_rows()),
-        ))
-    })
+    Ok(Some(BatchStats::new(
+        Arc::new(schema.into()),
+        stats,
+        Some(part_md.num_rows()),
+    )))
 }
 
-pub(super) fn read_this_row_group(
+pub fn read_this_row_group(
     predicate: Option<&dyn PhysicalIoExpr>,
-    md: &RowGroupMetaData,
-    schema: &ArrowSchemaRef,
+    part_md: &PartitionedColumnChunkMD,
+    schema: &ArrowSchema,
 ) -> PolarsResult<bool> {
     if let Some(pred) = predicate {
         if let Some(pred) = pred.as_stats_evaluator() {
-            if let Some(stats) = collect_statistics(md, schema)? {
+            if let Some(stats) = collect_statistics(part_md, schema)? {
                 let should_read = pred.should_read(&stats);
                 // a parquet file may not have statistics of all columns
                 if matches!(should_read, Ok(false)) {
diff --git a/crates/polars-io/src/parquet/read/read_impl.rs b/crates/polars-io/src/parquet/read/read_impl.rs
index 15d35fcd285b..d0b1845cc8c5 100644
--- a/crates/polars-io/src/parquet/read/read_impl.rs
+++ b/crates/polars-io/src/parquet/read/read_impl.rs
@@ -9,7 +9,9 @@ use polars_core::utils::{accumulate_dataframes_vertical, split_df};
 use polars_core::POOL;
 use polars_parquet::parquet::error::ParquetResult;
 use polars_parquet::parquet::statistics::Statistics;
-use polars_parquet::read::{self, FileMetaData, Filter, PhysicalType, RowGroupMetaData};
+use polars_parquet::read::{
+    self, ColumnChunkMetaData, FileMetaData, Filter, PhysicalType, RowGroupMetaData,
+};
 use polars_utils::mmap::MemSlice;
 use polars_utils::vec::inplace_zip_filtermap;
 use rayon::prelude::*;
@@ -24,6 +26,7 @@ use super::{mmap, ParallelStrategy};
 use crate::hive::materialize_hive_partitions;
 use crate::mmap::{MmapBytesReader, ReaderBytes};
 use crate::parquet::metadata::FileMetaDataRef;
+use crate::parquet::read::metadata::PartitionedColumnChunkMD;
 use crate::parquet::read::ROW_COUNT_OVERFLOW_ERR;
 use crate::predicates::{apply_predicate, PhysicalIoExpr};
 use crate::utils::get_reader_bytes;
@@ -58,7 +61,8 @@ fn assert_dtypes(data_type: &ArrowDataType) {
 
 fn column_idx_to_series(
     column_i: usize,
-    md: &RowGroupMetaData,
+    // The metadata belonging to this column
+    field_md: &[&ColumnChunkMetaData],
     filter: Option<Filter>,
     file_schema: &ArrowSchema,
     store: &mmap::ColumnStore,
@@ -69,8 +73,7 @@ fn column_idx_to_series(
     {
         assert_dtypes(field.data_type())
     }
-
-    let columns = mmap_columns(store, md.columns(), &field.name);
+    let columns = mmap_columns(store, field_md);
     let stats = columns
         .iter()
         .map(|(col_md, _)| col_md.statistics().transpose())
@@ -203,6 +206,24 @@ fn rg_to_dfs(
     }
 }
 
+/// Collect a HashSet of the projected columns.
+///  Returns `None` if all columns are projected.
+fn projected_columns_set<'a>(
+    schema: &'a ArrowSchema,
+    projection: &[usize],
+) -> Option<PlHashSet<&'a str>> {
+    if projection.len() == schema.len() {
+        None
+    } else {
+        Some(
+            projection
+                .iter()
+                .map(|i| schema.fields[*i].name.as_str())
+                .collect::<PlHashSet<_>>(),
+        )
+    }
+}
+
 #[allow(clippy::too_many_arguments)]
 fn rg_to_dfs_prefiltered(
     store: &mmap::ColumnStore,
@@ -227,9 +248,24 @@ fn rg_to_dfs_prefiltered(
         polars_bail!(ComputeError: "Parquet file contains too many row groups (> {})", u32::MAX);
     }
 
+    let projected_columns = projected_columns_set(schema, projection);
+
+    let part_mds = POOL.install(|| {
+        file_metadata
+            .row_groups
+            .par_iter()
+            .map(|rg| {
+                let mut part_md = PartitionedColumnChunkMD::new(rg);
+                part_md.set_partitions(projected_columns.as_ref());
+                part_md
+            })
+            .collect::<Vec<_>>()
+    });
+
     let mut row_offset = *previous_row_count;
     let mut row_groups: Vec<RowGroupInfo> = (row_group_start..row_group_end)
         .filter_map(|index| {
+            let part_md = &part_mds[index];
             let md = &file_metadata.row_groups[index];
 
             let current_offset = row_offset;
@@ -237,8 +273,7 @@ fn rg_to_dfs_prefiltered(
             row_offset += current_row_count;
 
             if use_statistics {
-                match read_this_row_group(Some(predicate), &file_metadata.row_groups[index], schema)
-                {
+                match read_this_row_group(Some(predicate), part_md, schema) {
                     Ok(false) => return None,
                     Ok(true) => {},
                     Err(e) => return Some(Err(e)),
@@ -252,38 +287,46 @@ fn rg_to_dfs_prefiltered(
         })
         .collect::<PolarsResult<Vec<_>>>()?;
 
-    let num_live_columns = live_variables.len();
-    let num_dead_columns = projection.len() - num_live_columns;
-
+    // Deduplicate the live variables
     let live_variables = live_variables
         .iter()
         .map(Deref::deref)
         .collect::<PlHashSet<_>>();
 
+    // Get the number of live columns
+    let num_live_columns = live_variables.len();
+    let num_dead_columns = projection.len() - num_live_columns;
+
     // We create two look-up tables that map indexes offsets into the live- and dead-set onto
     // column indexes of the schema.
     let mut live_idx_to_col_idx = Vec::with_capacity(num_live_columns);
     let mut dead_idx_to_col_idx = Vec::with_capacity(num_dead_columns);
-    for (i, col) in file_metadata.schema().columns().iter().enumerate() {
-        if live_variables.contains(col.path_in_schema[0].deref()) {
+    for (i, field) in schema.fields.iter().enumerate() {
+        if live_variables.contains(&field.name[..]) {
             live_idx_to_col_idx.push(i);
         } else {
             dead_idx_to_col_idx.push(i);
         }
     }
-    debug_assert_eq!(live_variables.len(), num_live_columns);
+
+    debug_assert_eq!(live_idx_to_col_idx.len(), num_live_columns);
     debug_assert_eq!(dead_idx_to_col_idx.len(), num_dead_columns);
 
     POOL.install(|| {
+        // Set partitioned fields to prevent quadratic behavior.
+        // Ensure all row groups are partitioned.
+
         // Collect the data for the live columns
         let mut live_columns = (0..row_groups.len() * num_live_columns)
             .into_par_iter()
             .map(|i| {
                 let col_idx = live_idx_to_col_idx[i % num_live_columns];
-                let rg_idx = row_groups[i / num_live_columns].index as usize;
 
-                let md = &file_metadata.row_groups[rg_idx];
-                column_idx_to_series(col_idx, md, None, schema, store)
+                let name = &schema.fields[col_idx].name;
+                let rg_idx = row_groups[i / num_live_columns].index;
+                let field_md = part_mds[rg_idx as usize].get_partitions(name).unwrap();
+
+                column_idx_to_series(col_idx, field_md.as_slice(), None, schema, store)
             })
             .collect::<PolarsResult<Vec<_>>>()?;
 
@@ -316,8 +359,12 @@ fn rg_to_dfs_prefiltered(
 
                 let mut bitmap = MutableBitmap::with_capacity(mask.len());
 
+                // We need to account for the validity of the items
                 for chunk in mask.downcast_iter() {
-                    bitmap.extend_from_bitmap(chunk.values());
+                    match chunk.validity() {
+                        None => bitmap.extend_from_bitmap(chunk.values()),
+                        Some(validity) => bitmap.extend_from_bitmap(&(validity & chunk.values())),
+                    }
                 }
 
                 let bitmap = bitmap.freeze();
@@ -341,6 +388,11 @@ fn rg_to_dfs_prefiltered(
                 .ok_or(ROW_COUNT_OVERFLOW_ERR)?;
         }
 
+        // We don't need to do any further work if there are no dead columns
+        if num_dead_columns == 0 {
+            return Ok(dfs.into_iter().map(|(_, df)| df).collect());
+        }
+
         // @TODO: Incorporate this if we how we can properly use it. The problem here is that
         // different columns really have a different cost when it comes to collecting them. We
         // would need a cost model to properly estimate this.
@@ -378,15 +430,22 @@ fn rg_to_dfs_prefiltered(
             .into_par_iter()
             .map(|i| {
                 let col_idx = dead_idx_to_col_idx[i % num_dead_columns];
-                let rg_idx = row_groups[i / num_dead_columns].index as usize;
+                let name = &schema.fields[col_idx].name;
 
                 let (mask, _) = &dfs[i / num_dead_columns];
 
-                let md = &file_metadata.row_groups[rg_idx];
-                debug_assert_eq!(md.num_rows(), mask.len());
+                let rg_idx = row_groups[i / num_dead_columns].index;
+
+                #[cfg(debug_assertions)]
+                {
+                    let md = &file_metadata.row_groups[rg_idx as usize];
+                    debug_assert_eq!(md.num_rows(), mask.len());
+                }
+                let field_md = part_mds[rg_idx as usize].get_partitions(name).unwrap();
+
                 column_idx_to_series(
                     col_idx,
-                    md,
+                    field_md.as_slice(),
                     Some(Filter::new_masked(mask.clone())),
                     schema,
                     store,
@@ -394,22 +453,10 @@ fn rg_to_dfs_prefiltered(
             })
             .collect::<PolarsResult<Vec<_>>>()?;
 
-        let mut rearranged_schema: Schema = Schema::new();
-        if let Some(rc) = &row_index {
-            rearranged_schema.insert_at_index(
-                0,
-                SmartString::from(rc.name.deref()),
-                IdxType::get_dtype(),
-            )?;
-        }
-        for i in live_idx_to_col_idx.iter().copied() {
-            rearranged_schema.insert_at_index(
-                rearranged_schema.len(),
-                schema.fields[i].name.clone().into(),
-                schema.fields[i].data_type().into(),
-            )?;
-        }
-        rearranged_schema.merge(Schema::from(schema.as_ref()));
+        let Some(df) = dfs.first().map(|(_, df)| df) else {
+            return Ok(Vec::new());
+        };
+        let rearranged_schema = df.schema();
 
         rg_columns
             .par_chunks_exact_mut(num_dead_columns)
@@ -454,13 +501,17 @@ fn rg_to_dfs_optionally_par_over_columns(
 
     for rg_idx in row_group_start..row_group_end {
         let md = &file_metadata.row_groups[rg_idx];
+
+        // Set partitioned fields to prevent quadratic behavior.
+        let projected_columns = projected_columns_set(schema, projection);
+        let mut part_md = PartitionedColumnChunkMD::new(md);
+        part_md.set_partitions(projected_columns.as_ref());
+
         let rg_slice =
             split_slice_at_file(&mut n_rows_processed, md.num_rows(), slice.0, slice_end);
         let current_row_count = md.num_rows() as IdxSize;
 
-        if use_statistics
-            && !read_this_row_group(predicate, &file_metadata.row_groups[rg_idx], schema)?
-        {
+        if use_statistics && !read_this_row_group(predicate, &part_md, schema)? {
             *previous_row_count += rg_slice.1 as IdxSize;
             continue;
         }
@@ -475,9 +526,12 @@ fn rg_to_dfs_optionally_par_over_columns(
                 projection
                     .par_iter()
                     .map(|column_i| {
+                        let name = &schema.fields[*column_i].name;
+                        let part = part_md.get_partitions(name).unwrap();
+
                         column_idx_to_series(
                             *column_i,
-                            md,
+                            part.as_slice(),
                             Some(Filter::new_ranged(rg_slice.0, rg_slice.0 + rg_slice.1)),
                             schema,
                             store,
@@ -489,9 +543,12 @@ fn rg_to_dfs_optionally_par_over_columns(
             projection
                 .iter()
                 .map(|column_i| {
+                    let name = &schema.fields[*column_i].name;
+                    let part = part_md.get_partitions(name).unwrap();
+
                     column_idx_to_series(
                         *column_i,
-                        md,
+                        part.as_slice(),
                         Some(Filter::new_ranged(rg_slice.0, rg_slice.0 + rg_slice.1)),
                         schema,
                         store,
@@ -508,7 +565,7 @@ fn rg_to_dfs_optionally_par_over_columns(
         materialize_hive_partitions(&mut df, schema.as_ref(), hive_partition_columns, rg_slice.1);
         apply_predicate(&mut df, predicate, true)?;
 
-        *previous_row_count = previous_row_count.checked_add(current_row_count).ok_or(
+        *previous_row_count = previous_row_count.checked_add(current_row_count).ok_or_else(||
             polars_err!(
                 ComputeError: "Parquet file produces more than pow(2, 32) rows; \
                 consider compiling with polars-bigidx feature (polars-u64-idx package on python), \
@@ -566,16 +623,28 @@ fn rg_to_dfs_par_over_rg(
     }
 
     let dfs = POOL.install(|| {
+        // Set partitioned fields to prevent quadratic behavior.
+        // Ensure all row groups are partitioned.
+        let part_mds = {
+            let projected_columns = projected_columns_set(schema, projection);
+            row_groups
+                .par_iter()
+                .map(|(_, rg, _, _)| {
+                    let mut ccmd = PartitionedColumnChunkMD::new(rg);
+                    ccmd.set_partitions(projected_columns.as_ref());
+                    ccmd
+                })
+                .collect::<Vec<_>>()
+        };
+
         row_groups
             .into_par_iter()
-            .map(|(rg_idx, md, slice, row_count_start)| {
+            .enumerate()
+            .map(|(iter_idx, (_rg_idx, _md, slice, row_count_start))| {
+                let part_md = &part_mds[iter_idx];
+
                 if slice.1 == 0
-                    || use_statistics
-                        && !read_this_row_group(
-                            predicate,
-                            &file_metadata.row_groups[rg_idx],
-                            schema,
-                        )?
+                    || use_statistics && !read_this_row_group(predicate, part_md, schema)?
                 {
                     return Ok(None);
                 }
@@ -588,9 +657,12 @@ fn rg_to_dfs_par_over_rg(
                 let columns = projection
                     .iter()
                     .map(|column_i| {
+                        let name = &schema.fields[*column_i].name;
+                        let field_md = part_md.get_partitions(name).unwrap();
+
                         column_idx_to_series(
                             *column_i,
-                            md,
+                            field_md.as_slice(),
                             Some(Filter::new_ranged(slice.0, slice.0 + slice.1)),
                             schema,
                             store,
@@ -1022,7 +1094,7 @@ impl BatchedParquetReader {
 
                 // Re-use the same ChunkedArray
                 if ca.len() < max_len {
-                    *ca = ca.new_from_index(max_len, 0);
+                    *ca = ca.new_from_index(0, max_len);
                 }
 
                 for df in &mut dfs {
diff --git a/crates/polars-io/src/utils/byte_source.rs b/crates/polars-io/src/utils/byte_source.rs
new file mode 100644
index 000000000000..fce7e795ce46
--- /dev/null
+++ b/crates/polars-io/src/utils/byte_source.rs
@@ -0,0 +1,176 @@
+use std::ops::Range;
+use std::sync::Arc;
+
+use polars_error::{to_compute_err, PolarsResult};
+use polars_utils::_limit_path_len_io_err;
+use polars_utils::mmap::MemSlice;
+
+use crate::cloud::{
+    build_object_store, object_path_from_str, CloudLocation, CloudOptions, ObjectStorePath,
+    PolarsObjectStore,
+};
+
+#[allow(async_fn_in_trait)]
+pub trait ByteSource: Send + Sync {
+    async fn get_size(&self) -> PolarsResult<usize>;
+    /// # Panics
+    /// Panics if `range` is not in bounds.
+    async fn get_range(&self, range: Range<usize>) -> PolarsResult<MemSlice>;
+    async fn get_ranges(&self, ranges: &[Range<usize>]) -> PolarsResult<Vec<MemSlice>>;
+}
+
+/// Byte source backed by a `MemSlice`, which can potentially be memory-mapped.
+pub struct MemSliceByteSource(pub MemSlice);
+
+impl MemSliceByteSource {
+    async fn try_new_mmap_from_path(
+        path: &str,
+        _cloud_options: Option<&CloudOptions>,
+    ) -> PolarsResult<Self> {
+        let file = Arc::new(
+            tokio::fs::File::open(path)
+                .await
+                .map_err(|err| _limit_path_len_io_err(path.as_ref(), err))?
+                .into_std()
+                .await,
+        );
+        let mmap = Arc::new(unsafe { memmap::Mmap::map(file.as_ref()) }.map_err(to_compute_err)?);
+
+        Ok(Self(MemSlice::from_mmap(mmap)))
+    }
+}
+
+impl ByteSource for MemSliceByteSource {
+    async fn get_size(&self) -> PolarsResult<usize> {
+        Ok(self.0.as_ref().len())
+    }
+
+    async fn get_range(&self, range: Range<usize>) -> PolarsResult<MemSlice> {
+        let out = self.0.slice(range);
+        Ok(out)
+    }
+
+    async fn get_ranges(&self, ranges: &[Range<usize>]) -> PolarsResult<Vec<MemSlice>> {
+        Ok(ranges
+            .iter()
+            .map(|x| self.0.slice(x.clone()))
+            .collect::<Vec<_>>())
+    }
+}
+
+pub struct ObjectStoreByteSource {
+    store: PolarsObjectStore,
+    path: ObjectStorePath,
+}
+
+impl ObjectStoreByteSource {
+    async fn try_new_from_path(
+        path: &str,
+        cloud_options: Option<&CloudOptions>,
+    ) -> PolarsResult<Self> {
+        let (CloudLocation { prefix, .. }, store) =
+            build_object_store(path, cloud_options, false).await?;
+        let path = object_path_from_str(&prefix)?;
+        let store = PolarsObjectStore::new(store);
+
+        Ok(Self { store, path })
+    }
+}
+
+impl ByteSource for ObjectStoreByteSource {
+    async fn get_size(&self) -> PolarsResult<usize> {
+        Ok(self.store.head(&self.path).await?.size)
+    }
+
+    async fn get_range(&self, range: Range<usize>) -> PolarsResult<MemSlice> {
+        let bytes = self.store.get_range(&self.path, range).await?;
+        let mem_slice = MemSlice::from_bytes(bytes);
+
+        Ok(mem_slice)
+    }
+
+    async fn get_ranges(&self, ranges: &[Range<usize>]) -> PolarsResult<Vec<MemSlice>> {
+        let ranges = self.store.get_ranges(&self.path, ranges).await?;
+        Ok(ranges.into_iter().map(MemSlice::from_bytes).collect())
+    }
+}
+
+/// Dynamic dispatch to async functions.
+pub enum DynByteSource {
+    MemSlice(MemSliceByteSource),
+    Cloud(ObjectStoreByteSource),
+}
+
+impl DynByteSource {
+    pub fn variant_name(&self) -> &str {
+        match self {
+            Self::MemSlice(_) => "MemSlice",
+            Self::Cloud(_) => "Cloud",
+        }
+    }
+}
+
+impl Default for DynByteSource {
+    fn default() -> Self {
+        Self::MemSlice(MemSliceByteSource(MemSlice::default()))
+    }
+}
+
+impl ByteSource for DynByteSource {
+    async fn get_size(&self) -> PolarsResult<usize> {
+        match self {
+            Self::MemSlice(v) => v.get_size().await,
+            Self::Cloud(v) => v.get_size().await,
+        }
+    }
+
+    async fn get_range(&self, range: Range<usize>) -> PolarsResult<MemSlice> {
+        match self {
+            Self::MemSlice(v) => v.get_range(range).await,
+            Self::Cloud(v) => v.get_range(range).await,
+        }
+    }
+
+    async fn get_ranges(&self, ranges: &[Range<usize>]) -> PolarsResult<Vec<MemSlice>> {
+        match self {
+            Self::MemSlice(v) => v.get_ranges(ranges).await,
+            Self::Cloud(v) => v.get_ranges(ranges).await,
+        }
+    }
+}
+
+impl From<MemSliceByteSource> for DynByteSource {
+    fn from(value: MemSliceByteSource) -> Self {
+        Self::MemSlice(value)
+    }
+}
+
+impl From<ObjectStoreByteSource> for DynByteSource {
+    fn from(value: ObjectStoreByteSource) -> Self {
+        Self::Cloud(value)
+    }
+}
+
+#[derive(Clone, Debug)]
+pub enum DynByteSourceBuilder {
+    Mmap,
+    /// Supports both cloud and local files.
+    ObjectStore,
+}
+
+impl DynByteSourceBuilder {
+    pub async fn try_build_from_path(
+        &self,
+        path: &str,
+        cloud_options: Option<&CloudOptions>,
+    ) -> PolarsResult<DynByteSource> {
+        Ok(match self {
+            Self::Mmap => MemSliceByteSource::try_new_mmap_from_path(path, cloud_options)
+                .await?
+                .into(),
+            Self::ObjectStore => ObjectStoreByteSource::try_new_from_path(path, cloud_options)
+                .await?
+                .into(),
+        })
+    }
+}
diff --git a/crates/polars-io/src/utils/mod.rs b/crates/polars-io/src/utils/mod.rs
index 5ed22c76561c..87c80b1b5c5a 100644
--- a/crates/polars-io/src/utils/mod.rs
+++ b/crates/polars-io/src/utils/mod.rs
@@ -3,6 +3,8 @@ mod other;
 
 pub use compression::is_compressed;
 pub use other::*;
+#[cfg(feature = "cloud")]
+pub mod byte_source;
 pub mod slice;
 
 pub const URL_ENCODE_CHAR_SET: &percent_encoding::AsciiSet = &percent_encoding::CONTROLS
diff --git a/crates/polars-io/src/utils/slice.rs b/crates/polars-io/src/utils/slice.rs
index 78ff29cf1b29..24a3b7dc1ab8 100644
--- a/crates/polars-io/src/utils/slice.rs
+++ b/crates/polars-io/src/utils/slice.rs
@@ -1,33 +1,58 @@
 /// Given a `slice` that is relative to the start of a list of files, calculate the slice to apply
 /// at a file with a row offset of `current_row_offset`.
 pub fn split_slice_at_file(
-    current_row_offset: &mut usize,
+    current_row_offset_ref: &mut usize,
     n_rows_this_file: usize,
     global_slice_start: usize,
     global_slice_end: usize,
 ) -> (usize, usize) {
-    let next_file_offset = *current_row_offset + n_rows_this_file;
-    // e.g.
-    // slice: (start: 1, end: 2)
-    // files:
-    //   0: (1 row): current_offset: 0, next_file_offset: 1
-    //   1: (1 row): current_offset: 1, next_file_offset: 2
-    //   2: (1 row): current_offset: 2, next_file_offset: 3
-    // in this example we want to include only file 1.
-    let has_overlap_with_slice =
-        *current_row_offset < global_slice_end && next_file_offset > global_slice_start;
+    let current_row_offset = *current_row_offset_ref;
+    *current_row_offset_ref += n_rows_this_file;
+    match SplitSlicePosition::split_slice_at_file(
+        current_row_offset,
+        n_rows_this_file,
+        global_slice_start..global_slice_end,
+    ) {
+        SplitSlicePosition::Overlapping(offset, len) => (offset, len),
+        SplitSlicePosition::Before | SplitSlicePosition::After => (0, 0),
+    }
+}
+
+#[derive(Debug)]
+pub enum SplitSlicePosition {
+    Before,
+    Overlapping(usize, usize),
+    After,
+}
+
+impl SplitSlicePosition {
+    pub fn split_slice_at_file(
+        current_row_offset: usize,
+        n_rows_this_file: usize,
+        global_slice: std::ops::Range<usize>,
+    ) -> Self {
+        // e.g.
+        // slice: (start: 1, end: 2)
+        // files:
+        //   0: (1 row): current_offset: 0, next_file_offset: 1
+        //   1: (1 row): current_offset: 1, next_file_offset: 2
+        //   2: (1 row): current_offset: 2, next_file_offset: 3
+        // in this example we want to include only file 1.
+
+        let next_row_offset = current_row_offset + n_rows_this_file;
 
-    let (rel_start, slice_len) = if !has_overlap_with_slice {
-        (0, 0)
-    } else {
-        let n_rows_to_skip = global_slice_start.saturating_sub(*current_row_offset);
-        let n_excess_rows = next_file_offset.saturating_sub(global_slice_end);
-        (
-            n_rows_to_skip,
-            n_rows_this_file - n_rows_to_skip - n_excess_rows,
-        )
-    };
+        if next_row_offset <= global_slice.start {
+            Self::Before
+        } else if current_row_offset >= global_slice.end {
+            Self::After
+        } else {
+            let n_rows_to_skip = global_slice.start.saturating_sub(current_row_offset);
+            let n_excess_rows = next_row_offset.saturating_sub(global_slice.end);
 
-    *current_row_offset = next_file_offset;
-    (rel_start, slice_len)
+            Self::Overlapping(
+                n_rows_to_skip,
+                n_rows_this_file - n_rows_to_skip - n_excess_rows,
+            )
+        }
+    }
 }
diff --git a/crates/polars-json/src/ndjson/file.rs b/crates/polars-json/src/ndjson/file.rs
index 1f4af394b78c..08f059b685d2 100644
--- a/crates/polars-json/src/ndjson/file.rs
+++ b/crates/polars-json/src/ndjson/file.rs
@@ -90,6 +90,7 @@ fn parse_value<'a>(scratch: &'a mut Vec<u8>, val: &[u8]) -> PolarsResult<Borrowe
     scratch.clear();
     scratch.extend_from_slice(val);
     // 0 because it is row by row
+
     simd_json::to_borrowed_value(scratch)
         .map_err(|e| PolarsError::ComputeError(format!("{e}").into()))
 }
diff --git a/crates/polars-json/src/ndjson/mod.rs b/crates/polars-json/src/ndjson/mod.rs
index cf98157976a8..4b345362e4ac 100644
--- a/crates/polars-json/src/ndjson/mod.rs
+++ b/crates/polars-json/src/ndjson/mod.rs
@@ -4,5 +4,4 @@ use polars_error::*;
 pub mod deserialize;
 mod file;
 pub mod write;
-
 pub use file::{infer_iter, iter_unique_dtypes};
diff --git a/crates/polars-lazy/src/frame/cached_arenas.rs b/crates/polars-lazy/src/frame/cached_arenas.rs
index ecca97c06ac5..b57d84043d7a 100644
--- a/crates/polars-lazy/src/frame/cached_arenas.rs
+++ b/crates/polars-lazy/src/frame/cached_arenas.rs
@@ -36,7 +36,7 @@ impl LazyFrame {
     ///
     /// Returns an `Err` if the logical plan has already encountered an error (i.e., if
     /// `self.collect()` would fail), `Ok` otherwise.
-    pub fn schema(&mut self) -> PolarsResult<SchemaRef> {
+    pub fn collect_schema(&mut self) -> PolarsResult<SchemaRef> {
         let mut cached_arenas = self.cached_arena.lock().unwrap();
 
         match &mut *cached_arenas {
diff --git a/crates/polars-lazy/src/frame/mod.rs b/crates/polars-lazy/src/frame/mod.rs
index 9f81d07a97d3..e97633f4433d 100644
--- a/crates/polars-lazy/src/frame/mod.rs
+++ b/crates/polars-lazy/src/frame/mod.rs
@@ -67,6 +67,7 @@ impl IntoLazy for LazyFrame {
 }
 
 /// Lazy abstraction over an eager `DataFrame`.
+///
 /// It really is an abstraction over a logical plan. The methods of this struct will incrementally
 /// modify a logical plan until output is requested (via [`collect`](crate::frame::LazyFrame::collect)).
 #[derive(Clone, Default)]
@@ -582,10 +583,19 @@ impl LazyFrame {
         #[allow(unused_mut)]
         let mut opt_state = self.opt_state;
         let streaming = self.opt_state.contains(OptState::STREAMING);
+        let new_streaming = self.opt_state.contains(OptState::NEW_STREAMING);
         #[cfg(feature = "cse")]
-        if streaming && self.opt_state.contains(OptState::COMM_SUBPLAN_ELIM) {
+        if streaming && !new_streaming {
             opt_state &= !OptState::COMM_SUBPLAN_ELIM;
         }
+
+        // The new streaming engine can't deal with the way the common
+        // subexpression elimination adds length-incorrect with_columns.
+        #[cfg(feature = "cse")]
+        if new_streaming {
+            opt_state &= !OptState::COMM_SUBEXPR_ELIM;
+        }
+
         let lp_top = optimize(
             self.logical_plan,
             opt_state,
@@ -694,48 +704,45 @@ impl LazyFrame {
     pub fn collect(self) -> PolarsResult<DataFrame> {
         #[cfg(feature = "new_streaming")]
         {
-            let force_new_streaming = self.opt_state.contains(OptState::NEW_STREAMING);
-            let mut alp_plan = self.to_alp_optimized()?;
-            let stream_lp_top = alp_plan.lp_arena.add(IR::Sink {
-                input: alp_plan.lp_top,
-                payload: SinkType::Memory,
-            });
-
-            if force_new_streaming {
-                return polars_stream::run_query(
-                    stream_lp_top,
-                    alp_plan.lp_arena,
-                    &alp_plan.expr_arena,
-                );
-            }
+            let auto_new_streaming =
+                std::env::var("POLARS_AUTO_NEW_STREAMING").as_deref() == Ok("1");
+            if self.opt_state.contains(OptState::NEW_STREAMING) || auto_new_streaming {
+                // Try to run using the new streaming engine, falling back
+                // if it fails in a todo!() error if auto_new_streaming is set.
+                let mut new_stream_lazy = self.clone();
+                new_stream_lazy.opt_state |= OptState::NEW_STREAMING;
+                let mut alp_plan = new_stream_lazy.to_alp_optimized()?;
+                let stream_lp_top = alp_plan.lp_arena.add(IR::Sink {
+                    input: alp_plan.lp_top,
+                    payload: SinkType::Memory,
+                });
 
-            if std::env::var("POLARS_AUTO_NEW_STREAMING")
-                .as_deref()
-                .unwrap_or("")
-                == "1"
-            {
                 let f = || {
                     polars_stream::run_query(
                         stream_lp_top,
-                        alp_plan.lp_arena.clone(),
-                        &alp_plan.expr_arena,
+                        alp_plan.lp_arena,
+                        &mut alp_plan.expr_arena,
                     )
                 };
                 match std::panic::catch_unwind(std::panic::AssertUnwindSafe(f)) {
                     Ok(r) => return r,
                     Err(e) => {
-                        // Fallback to normal engine if error is due to not being implemented,
-                        // otherwise propagate error.
-                        if e.downcast_ref::<&str>() != Some(&"not yet implemented") {
+                        // Fallback to normal engine if error is due to not being implemented
+                        // and auto_new_streaming is set, otherwise propagate error.
+                        if auto_new_streaming
+                            && e.downcast_ref::<&str>() == Some(&"not yet implemented")
+                        {
                             if polars_core::config::verbose() {
                                 eprintln!("caught unimplemented error in new streaming engine, falling back to normal engine");
                             }
+                        } else {
                             std::panic::resume_unwind(e);
                         }
                     },
                 }
             }
 
+            let mut alp_plan = self.to_alp_optimized()?;
             let mut physical_plan = create_physical_plan(
                 alp_plan.lp_top,
                 &mut alp_plan.lp_arena,
@@ -1045,7 +1052,7 @@ impl LazyFrame {
             options.index_column = name.as_ref().into();
         } else {
             let output_field = index_column
-                .to_field(&self.schema().unwrap(), Context::Default)
+                .to_field(&self.collect_schema().unwrap(), Context::Default)
                 .unwrap();
             return self.with_column(index_column).rolling(
                 Expr::Column(Arc::from(output_field.name().as_str())),
@@ -1090,7 +1097,7 @@ impl LazyFrame {
             options.index_column = name.as_ref().into();
         } else {
             let output_field = index_column
-                .to_field(&self.schema().unwrap(), Context::Default)
+                .to_field(&self.collect_schema().unwrap(), Context::Default)
                 .unwrap();
             return self.with_column(index_column).group_by_dynamic(
                 Expr::Column(Arc::from(output_field.name().as_str())),
@@ -1513,13 +1520,25 @@ impl LazyFrame {
 
     /// Apply explode operation. [See eager explode](polars_core::frame::DataFrame::explode).
     pub fn explode<E: AsRef<[IE]>, IE: Into<Selector> + Clone>(self, columns: E) -> LazyFrame {
+        self.explode_impl(columns, false)
+    }
+
+    /// Apply explode operation. [See eager explode](polars_core::frame::DataFrame::explode).
+    fn explode_impl<E: AsRef<[IE]>, IE: Into<Selector> + Clone>(
+        self,
+        columns: E,
+        allow_empty: bool,
+    ) -> LazyFrame {
         let columns = columns
             .as_ref()
             .iter()
             .map(|e| e.clone().into())
             .collect::<Vec<_>>();
         let opt_state = self.get_opt_state();
-        let lp = self.get_plan_builder().explode(columns).build();
+        let lp = self
+            .get_plan_builder()
+            .explode(columns, allow_empty)
+            .build();
         Self::from_logical_plan(lp, opt_state)
     }
 
@@ -1877,7 +1896,7 @@ impl LazyGroupBy {
             .collect::<Vec<_>>();
 
         self.agg([col("*").exclude(&keys).head(n)])
-            .explode([col("*").exclude(&keys)])
+            .explode_impl([col("*").exclude(&keys)], true)
     }
 
     /// Return last n rows of each group
@@ -1889,7 +1908,7 @@ impl LazyGroupBy {
             .collect::<Vec<_>>();
 
         self.agg([col("*").exclude(&keys).tail(n)])
-            .explode([col("*").exclude(&keys)])
+            .explode_impl([col("*").exclude(&keys)], true)
     }
 
     /// Apply a function over the groups as a new DataFrame.
diff --git a/crates/polars-lazy/src/frame/pivot.rs b/crates/polars-lazy/src/frame/pivot.rs
index 759981c52f0e..eedcdc700e1e 100644
--- a/crates/polars-lazy/src/frame/pivot.rs
+++ b/crates/polars-lazy/src/frame/pivot.rs
@@ -1,3 +1,5 @@
+//! Module containing implementation of the pivot operation.
+//!
 //! Polars lazy does not implement a pivot because it is impossible to know the schema without
 //! materializing the whole dataset. This makes a pivot quite a terrible operation for performant
 //! workflows. An optimization can never be pushed down passed a pivot.
diff --git a/crates/polars-lazy/src/tests/optimization_checks.rs b/crates/polars-lazy/src/tests/optimization_checks.rs
index ffdfedd9bfc5..ecaaba71056d 100644
--- a/crates/polars-lazy/src/tests/optimization_checks.rs
+++ b/crates/polars-lazy/src/tests/optimization_checks.rs
@@ -495,7 +495,7 @@ fn test_with_column_prune() -> PolarsResult<()> {
         matches!(lp, SimpleProjection { .. } | DataFrameScan { .. })
     }));
     assert_eq!(
-        q.schema().unwrap().as_ref(),
+        q.collect_schema().unwrap().as_ref(),
         &Schema::from_iter([Field::new("c1", DataType::Int32)])
     );
     Ok(())
diff --git a/crates/polars-lazy/src/tests/queries.rs b/crates/polars-lazy/src/tests/queries.rs
index fe777499812d..d32efc4b295e 100644
--- a/crates/polars-lazy/src/tests/queries.rs
+++ b/crates/polars-lazy/src/tests/queries.rs
@@ -1440,7 +1440,7 @@ fn test_when_then_schema() -> PolarsResult<()> {
         .select([when(col("A").gt(lit(1)))
             .then(Null {}.lit())
             .otherwise(col("A"))])
-        .schema();
+        .collect_schema();
     assert_ne!(schema?.get_at_index(0).unwrap().1, &DataType::Null);
 
     Ok(())
diff --git a/crates/polars-mem-engine/src/executors/scan/parquet.rs b/crates/polars-mem-engine/src/executors/scan/parquet.rs
index a78dbf113151..bc3f69ac95ab 100644
--- a/crates/polars-mem-engine/src/executors/scan/parquet.rs
+++ b/crates/polars-mem-engine/src/executors/scan/parquet.rs
@@ -187,15 +187,14 @@ impl ParquetExec {
                 readers_and_metadata
                     .into_par_iter()
                     .zip(row_statistics.into_par_iter())
-                    .enumerate()
                     .map(
-                        |(i, ((reader, _, predicate, projection), (cumulative_read, slice)))| {
+                        |((reader, _, predicate, projection), (cumulative_read, slice))| {
                             let row_index = base_row_index.as_ref().map(|rc| RowIndex {
                                 name: rc.name.clone(),
                                 offset: rc.offset + cumulative_read as IdxSize,
                             });
 
-                            let mut df = reader
+                            let df = reader
                                 .with_slice(Some(slice))
                                 .with_row_index(row_index)
                                 .with_predicate(predicate.clone())
@@ -210,20 +209,6 @@ impl ParquetExec {
                                 )?
                                 .finish()?;
 
-                            if let Some(col) = &self.file_options.include_file_paths {
-                                let path = paths[i].to_str().unwrap();
-                                unsafe {
-                                    df.with_column_unchecked(
-                                        StringChunked::full(
-                                            col,
-                                            path,
-                                            std::cmp::max(df.height(), slice.1),
-                                        )
-                                        .into_series(),
-                                    )
-                                };
-                            }
-
                             Ok(df)
                         },
                     )
diff --git a/crates/polars-ops/Cargo.toml b/crates/polars-ops/Cargo.toml
index 3bbdb10fcaf0..163b45726837 100644
--- a/crates/polars-ops/Cargo.toml
+++ b/crates/polars-ops/Cargo.toml
@@ -79,10 +79,11 @@ business = ["dtype-date", "chrono"]
 fused = []
 cutqcut = ["dtype-categorical", "dtype-struct"]
 rle = ["dtype-struct"]
-timezones = ["chrono-tz", "chrono"]
+timezones = ["chrono", "chrono-tz", "polars-core/temporal", "polars-core/timezones", "polars-core/dtype-datetime"]
 random = ["rand", "rand_distr"]
 rank = ["rand"]
 find_many = ["aho-corasick"]
+serde = ["dep:serde", "polars-core/serde"]
 
 # extra utilities for BinaryChunked
 binary_encoding = ["base64", "hex"]
@@ -112,7 +113,7 @@ mode = []
 search_sorted = []
 merge_sorted = []
 top_k = []
-pivot = ["polars-core/reinterpret"]
+pivot = ["polars-core/reinterpret", "polars-core/dtype-struct"]
 cross_join = []
 chunked_ids = []
 asof_join = []
@@ -123,7 +124,7 @@ list_gather = []
 list_sets = []
 list_any_all = []
 list_drop_nulls = []
-list_sample = []
+list_sample = ["polars-core/random"]
 extract_groups = ["dtype-struct", "polars-core/regex"]
 is_in = ["polars-core/reinterpret"]
 hist = ["dtype-categorical", "dtype-struct"]
diff --git a/crates/polars-ops/src/chunked_array/hist.rs b/crates/polars-ops/src/chunked_array/hist.rs
index d2a0acc76239..455a0c6cc921 100644
--- a/crates/polars-ops/src/chunked_array/hist.rs
+++ b/crates/polars-ops/src/chunked_array/hist.rs
@@ -3,7 +3,6 @@ use std::fmt::Write;
 use num_traits::ToPrimitive;
 use polars_core::prelude::*;
 use polars_core::with_match_physical_numeric_polars_type;
-use polars_utils::float::IsFloat;
 use polars_utils::total_ord::ToTotalOrd;
 
 fn compute_hist<T>(
@@ -17,6 +16,7 @@ where
     T: PolarsNumericType,
     ChunkedArray<T>: ChunkAgg<T::Native>,
 {
+    let mut lower_bound: f64;
     let (breaks, count) = if let Some(bins) = bins {
         let mut breaks = Vec::with_capacity(bins.len() + 1);
         breaks.extend_from_slice(bins);
@@ -31,7 +31,7 @@ where
 
         // We start with the lower garbage bin.
         // (-inf, B0]
-        let mut lower_bound = f64::NEG_INFINITY;
+        lower_bound = f64::NEG_INFINITY;
         let mut upper_bound = *breaks_iter.next().unwrap();
 
         for chunk in sorted.downcast_iter() {
@@ -60,17 +60,17 @@ where
         while count.len() < breaks.len() {
             count.push(0)
         }
+        // Push lower bound to infinity
+        lower_bound = f64::NEG_INFINITY;
         (breaks, count)
     } else if ca.null_count() == ca.len() {
+        lower_bound = f64::NEG_INFINITY;
         let breaks: Vec<f64> = vec![f64::INFINITY];
         let count: Vec<IdxSize> = vec![0];
         (breaks, count)
     } else {
-        let min = ChunkAgg::min(ca).unwrap().to_f64().unwrap();
-        let max = ChunkAgg::max(ca).unwrap().to_f64().unwrap();
-
-        let start = min.floor() - 1.0;
-        let end = max.ceil() + 1.0;
+        let start = ChunkAgg::min(ca).unwrap().to_f64().unwrap();
+        let end = ChunkAgg::max(ca).unwrap().to_f64().unwrap();
 
         // If bin_count is omitted, default to the difference between start and stop (unit bins)
         let bin_count = if let Some(bin_count) = bin_count {
@@ -79,37 +79,24 @@ where
             (end - start).round() as usize
         };
 
-        // Calculate the breakpoints and make the array
+        // Calculate the breakpoints and make the array. The breakpoints form the RHS of the bins.
         let interval = (end - start) / (bin_count as f64);
-
-        let breaks_iter = (0..(bin_count)).map(|b| start + (b as f64) * interval);
-
+        let breaks_iter = (1..(bin_count)).map(|b| start + (b as f64) * interval);
         let mut breaks = Vec::with_capacity(breaks_iter.size_hint().0 + 1);
         breaks.extend(breaks_iter);
-        breaks.push(f64::INFINITY);
 
-        let mut count: Vec<IdxSize> = vec![0; breaks.len()];
-        let end_idx = count.len() - 1;
+        // Extend the left-most edge by 0.1% of the total range to include the minimum value.
+        let margin = (end - start) * 0.001;
+        lower_bound = start - margin;
+        breaks.push(end);
 
-        // start is the closed rhs of the interval, so we subtract the bucket width
-        let start_range = start - interval;
+        let mut count: Vec<IdxSize> = vec![0; bin_count];
+        let max_bin = breaks.len() - 1;
         for chunk in ca.downcast_iter() {
             for item in chunk.non_null_values_iter() {
-                let item = item.to_f64().unwrap() - start_range;
-
-                // This is needed for numeric stability.
-                // Only for integers.
-                // we can fall directly on a boundary with an integer.
-                let item = item / interval;
-                let item = if !T::Native::is_float() && (item.round() - item).abs() < 0.0000001 {
-                    item.round() - 1.0
-                } else {
-                    item.ceil() - 1.0
-                };
-
-                let idx = item as usize;
-                let idx = std::cmp::min(idx, end_idx);
-                count[idx] += 1;
+                let item = item.to_f64().unwrap();
+                let bin = ((((item - start) / interval).ceil() - 1.0) as usize).min(max_bin);
+                count[bin] += 1;
             }
         }
         (breaks, count)
@@ -117,7 +104,7 @@ where
     let mut fields = Vec::with_capacity(3);
     if include_category {
         // Use AnyValue for formatting.
-        let mut lower = AnyValue::Float64(f64::NEG_INFINITY);
+        let mut lower = AnyValue::Float64(lower_bound);
         let mut categories = StringChunkedBuilder::new("category", breaks.len());
 
         let mut buf = String::new();
diff --git a/crates/polars-ops/src/chunked_array/top_k.rs b/crates/polars-ops/src/chunked_array/top_k.rs
index f5948d0c88a4..9772a5593be0 100644
--- a/crates/polars-ops/src/chunked_array/top_k.rs
+++ b/crates/polars-ops/src/chunked_array/top_k.rs
@@ -204,6 +204,7 @@ pub fn top_k(s: &[Series], descending: bool) -> PolarsResult<Series> {
             Ok(ca.into_series())
         },
         DataType::Binary => Ok(top_k_binary_impl(s.binary().unwrap(), k, descending).into_series()),
+        #[cfg(feature = "dtype-decimal")]
         DataType::Decimal(_, _) => {
             let src = src.decimal().unwrap();
             let ca = top_k_num_impl(src, k, descending);
@@ -212,6 +213,7 @@ pub fn top_k(s: &[Series], descending: bool) -> PolarsResult<Series> {
             Ok(lca.into_series())
         },
         DataType::Null => Ok(src.slice(0, k)),
+        #[cfg(feature = "dtype-struct")]
         DataType::Struct(_) => {
             // Fallback to more generic impl.
             top_k_by_impl(k, src, &[src.clone()], vec![descending])
diff --git a/crates/polars-ops/src/frame/pivot/unpivot.rs b/crates/polars-ops/src/frame/pivot/unpivot.rs
index 3b45b1986fa5..289529d4b4f4 100644
--- a/crates/polars-ops/src/frame/pivot/unpivot.rs
+++ b/crates/polars-ops/src/frame/pivot/unpivot.rs
@@ -104,7 +104,7 @@ pub trait UnpivotDF: IntoDf {
             // return empty frame if there are no columns available to use as value vars
             if index.len() == self_.width() {
                 let variable_col = Series::new_empty(variable_name, &DataType::String);
-                let value_col = Series::new_empty(variable_name, &DataType::Null);
+                let value_col = Series::new_empty(value_name, &DataType::Null);
 
                 let mut out = self_.select(index).unwrap().clear().take_columns();
                 out.push(variable_col);
@@ -193,6 +193,7 @@ impl UnpivotDF for DataFrame {}
 #[cfg(test)]
 mod test {
     use polars_core::df;
+    use polars_core::utils::Container;
 
     use super::*;
 
@@ -205,12 +206,31 @@ mod test {
         )
         .unwrap();
 
+        // Specify on and index
         let unpivoted = df.unpivot(["C", "D"], ["A", "B"])?;
+        assert_eq!(
+            unpivoted.get_column_names(),
+            &["A", "B", "variable", "value"]
+        );
         assert_eq!(
             Vec::from(unpivoted.column("value")?.i32()?),
             &[Some(10), Some(11), Some(12), Some(2), Some(4), Some(6)]
         );
 
+        // Specify custom column names
+        let args = UnpivotArgsIR {
+            on: vec!["C".into(), "D".into()],
+            index: vec!["A".into(), "B".into()],
+            variable_name: Some("custom_variable".into()),
+            value_name: Some("custom_value".into()),
+        };
+        let unpivoted = df.unpivot2(args).unwrap();
+        assert_eq!(
+            unpivoted.get_column_names(),
+            &["A", "B", "custom_variable", "custom_value"]
+        );
+
+        // Specify neither on nor index
         let args = UnpivotArgsIR {
             on: vec![],
             index: vec![],
@@ -218,6 +238,7 @@ mod test {
         };
 
         let unpivoted = df.unpivot2(args).unwrap();
+        assert_eq!(unpivoted.get_column_names(), &["variable", "value"]);
         let value = unpivoted.column("value")?;
         // String because of supertype
         let value = value.str()?;
@@ -227,6 +248,7 @@ mod test {
             &["a", "b", "a", "1", "3", "5", "10", "11", "12", "2", "4", "6"]
         );
 
+        // Specify index but not on
         let args = UnpivotArgsIR {
             on: vec![],
             index: vec!["A".into()],
@@ -234,6 +256,7 @@ mod test {
         };
 
         let unpivoted = df.unpivot2(args).unwrap();
+        assert_eq!(unpivoted.get_column_names(), &["A", "variable", "value"]);
         let value = unpivoted.column("value")?;
         let value = value.i32()?;
         let value = value.into_no_null_iter().collect::<Vec<_>>();
@@ -243,6 +266,20 @@ mod test {
         let variable = variable.into_no_null_iter().collect::<Vec<_>>();
         assert_eq!(variable, &["B", "B", "B", "C", "C", "C", "D", "D", "D"]);
         assert!(unpivoted.column("A").is_ok());
+
+        // Specify all columns in index
+        let args = UnpivotArgsIR {
+            on: vec![],
+            index: vec!["A".into(), "B".into(), "C".into(), "D".into()],
+            ..Default::default()
+        };
+        let unpivoted = df.unpivot2(args).unwrap();
+        assert_eq!(
+            unpivoted.get_column_names(),
+            &["A", "B", "C", "D", "variable", "value"]
+        );
+        assert_eq!(unpivoted.len(), 0);
+
         Ok(())
     }
 }
diff --git a/crates/polars-parquet/src/arrow/read/deserialize/binary/basic.rs b/crates/polars-parquet/src/arrow/read/deserialize/binary/basic.rs
index 2d56f09d0af1..67e895fd173e 100644
--- a/crates/polars-parquet/src/arrow/read/deserialize/binary/basic.rs
+++ b/crates/polars-parquet/src/arrow/read/deserialize/binary/basic.rs
@@ -144,6 +144,10 @@ impl<'a, 'b, O: Offset> BatchableCollector<(), Binary<O>> for DeltaCollector<'a,
         target.extend_constant(n);
         Ok(())
     }
+
+    fn skip_in_place(&mut self, n: usize) -> ParquetResult<()> {
+        self.decoder.skip_in_place(n)
+    }
 }
 
 impl<'a, 'b, O: Offset> BatchableCollector<(), Binary<O>> for DeltaBytesCollector<'a, 'b, O> {
@@ -159,6 +163,10 @@ impl<'a, 'b, O: Offset> BatchableCollector<(), Binary<O>> for DeltaBytesCollecto
         target.extend_constant(n);
         Ok(())
     }
+
+    fn skip_in_place(&mut self, n: usize) -> ParquetResult<()> {
+        self.decoder.skip_in_place(n)
+    }
 }
 
 impl<'a, O: Offset> StateTranslation<'a, BinaryDecoder<O>> for BinaryStateTranslation<'a> {
@@ -210,7 +218,7 @@ impl<'a, O: Offset> StateTranslation<'a, BinaryDecoder<O>> for BinaryStateTransl
                 page.dict,
                 additional,
             )?,
-            T::Delta(ref mut page) => {
+            T::DeltaLengthByteArray(ref mut page, ref mut _lengths) => {
                 let (values, validity) = decoded;
 
                 let mut collector = DeltaCollector {
diff --git a/crates/polars-parquet/src/arrow/read/deserialize/binary/decoders.rs b/crates/polars-parquet/src/arrow/read/deserialize/binary/decoders.rs
index 53c25d8050b9..fc98e039229e 100644
--- a/crates/polars-parquet/src/arrow/read/deserialize/binary/decoders.rs
+++ b/crates/polars-parquet/src/arrow/read/deserialize/binary/decoders.rs
@@ -39,7 +39,7 @@ impl<'a> ValuesDictionary<'a> {
 pub(crate) enum BinaryStateTranslation<'a> {
     Plain(BinaryIter<'a>),
     Dictionary(ValuesDictionary<'a>),
-    Delta(delta_length_byte_array::Decoder<'a>),
+    DeltaLengthByteArray(delta_length_byte_array::Decoder<'a>, Vec<u32>),
     DeltaBytes(delta_byte_array::Decoder<'a>),
 }
 
@@ -67,8 +67,9 @@ impl<'a> BinaryStateTranslation<'a> {
             },
             (Encoding::DeltaLengthByteArray, _) => {
                 let values = split_buffer(page)?.values;
-                Ok(BinaryStateTranslation::Delta(
+                Ok(BinaryStateTranslation::DeltaLengthByteArray(
                     delta_length_byte_array::Decoder::try_new(values)?,
+                    Vec::new(),
                 ))
             },
             (Encoding::DeltaByteArray, _) => {
@@ -84,7 +85,7 @@ impl<'a> BinaryStateTranslation<'a> {
         match self {
             Self::Plain(v) => v.len_when_not_nullable(),
             Self::Dictionary(v) => v.len(),
-            Self::Delta(v) => v.len(),
+            Self::DeltaLengthByteArray(v, _) => v.len(),
             Self::DeltaBytes(v) => v.len(),
         }
     }
@@ -97,7 +98,7 @@ impl<'a> BinaryStateTranslation<'a> {
         match self {
             Self::Plain(t) => _ = t.by_ref().nth(n - 1),
             Self::Dictionary(t) => t.values.skip_in_place(n)?,
-            Self::Delta(t) => t.skip_in_place(n)?,
+            Self::DeltaLengthByteArray(t, _) => t.skip_in_place(n)?,
             Self::DeltaBytes(t) => t.skip_in_place(n)?,
         }
 
diff --git a/crates/polars-parquet/src/arrow/read/deserialize/binview.rs b/crates/polars-parquet/src/arrow/read/deserialize/binview.rs
index bf6f4bf97f1d..be615035addb 100644
--- a/crates/polars-parquet/src/arrow/read/deserialize/binview.rs
+++ b/crates/polars-parquet/src/arrow/read/deserialize/binview.rs
@@ -10,7 +10,7 @@ use arrow::datatypes::{ArrowDataType, PhysicalType};
 
 use super::binary::decoders::*;
 use super::utils::{freeze_validity, BatchableCollector};
-use crate::parquet::encoding::delta_bitpacked::DeltaGatherer;
+use crate::parquet::encoding::delta_bitpacked::{lin_natural_sum, DeltaGatherer};
 use crate::parquet::encoding::hybrid_rle::{self, DictionaryTranslator};
 use crate::parquet::encoding::{delta_byte_array, delta_length_byte_array};
 use crate::parquet::error::{ParquetError, ParquetResult};
@@ -85,23 +85,27 @@ impl<'a> StateTranslation<'a, BinViewDecoder> for BinaryStateTranslation<'a> {
                 // Already done in decode_plain_encoded
                 validate_utf8 = false;
             },
-            Self::Delta(ref mut page_values) => {
+            Self::DeltaLengthByteArray(ref mut page_values, ref mut lengths) => {
                 let (values, validity) = decoded;
 
                 let mut collector = DeltaCollector {
+                    gatherer: &mut StatGatherer::default(),
+                    pushed_lengths: lengths,
                     decoder: page_values,
                 };
 
                 match page_validity {
-                    None => collector.push_n(values, additional)?,
+                    None => (&mut collector).push_n(values, additional)?,
                     Some(page_validity) => extend_from_decoder(
                         validity,
                         page_validity,
                         Some(additional),
                         values,
-                        collector,
+                        &mut collector,
                     )?,
                 }
+
+                collector.flush(values);
             },
             Self::DeltaBytes(ref mut page_values) => {
                 let (values, validity) = decoded;
@@ -147,6 +151,12 @@ impl utils::ExactSize for DecodedStateTuple {
 }
 
 pub(crate) struct DeltaCollector<'a, 'b> {
+    // We gatherer the decoded lengths into `pushed_lengths`. Then, we `flush` those to the
+    // `BinView` This allows us to group many memcopies into one and take better potential fast
+    // paths for inlineable views and such.
+    pub(crate) gatherer: &'b mut StatGatherer,
+    pub(crate) pushed_lengths: &'b mut Vec<u32>,
+
     pub(crate) decoder: &'b mut delta_length_byte_array::Decoder<'a>,
 }
 
@@ -154,44 +164,148 @@ pub(crate) struct DeltaBytesCollector<'a, 'b> {
     pub(crate) decoder: &'b mut delta_byte_array::Decoder<'a>,
 }
 
-pub(crate) struct ViewGatherer<'a, 'b> {
-    values: &'a [u8],
-    offset: &'b mut usize,
+/// A [`DeltaGatherer`] that gathers the minimum, maximum and summation of the values as `usize`s.
+pub(crate) struct StatGatherer {
+    min: usize,
+    max: usize,
+    sum: usize,
+}
+
+impl Default for StatGatherer {
+    fn default() -> Self {
+        Self {
+            min: usize::MAX,
+            max: usize::MIN,
+            sum: 0,
+        }
+    }
 }
 
-impl<'a, 'b> DeltaGatherer for ViewGatherer<'a, 'b> {
-    type Target = MutableBinaryViewArray<[u8]>;
+impl DeltaGatherer for StatGatherer {
+    type Target = Vec<u32>;
 
     fn target_len(&self, target: &Self::Target) -> usize {
         target.len()
     }
 
     fn target_reserve(&self, target: &mut Self::Target, n: usize) {
-        target.views_mut().reserve(n)
+        target.reserve(n);
     }
 
     fn gather_one(&mut self, target: &mut Self::Target, v: i64) -> ParquetResult<()> {
+        if v < 0 {
+            return Err(ParquetError::oos("DELTA_LENGTH_BYTE_ARRAY length < 0"));
+        }
+
+        if v > i64::from(u32::MAX) {
+            return Err(ParquetError::not_supported(
+                "DELTA_LENGTH_BYTE_ARRAY length > u32::MAX",
+            ));
+        }
+
         let v = v as usize;
-        let s = &self.values[*self.offset..*self.offset + v];
-        *self.offset += v;
-        target.push(Some(s));
+
+        self.min = self.min.min(v);
+        self.max = self.max.max(v);
+        self.sum += v;
+
+        target.push(v as u32);
+
+        Ok(())
+    }
+
+    fn gather_slice(&mut self, target: &mut Self::Target, slice: &[i64]) -> ParquetResult<()> {
+        let mut is_invalid = false;
+        let mut is_too_large = false;
+
+        target.extend(slice.iter().map(|&v| {
+            is_invalid |= v < 0;
+            is_too_large |= v > i64::from(u32::MAX);
+
+            let v = v as usize;
+
+            self.min = self.min.min(v);
+            self.max = self.max.max(v);
+            self.sum += v;
+
+            v as u32
+        }));
+
+        if is_invalid {
+            target.truncate(target.len() - slice.len());
+            return Err(ParquetError::oos("DELTA_LENGTH_BYTE_ARRAY length < 0"));
+        }
+
+        if is_too_large {
+            return Err(ParquetError::not_supported(
+                "DELTA_LENGTH_BYTE_ARRAY length > u32::MAX",
+            ));
+        }
+
+        Ok(())
+    }
+
+    fn gather_constant(
+        &mut self,
+        target: &mut Self::Target,
+        v: i64,
+        delta: i64,
+        num_repeats: usize,
+    ) -> ParquetResult<()> {
+        if v < 0 || (delta < 0 && num_repeats > 0 && (num_repeats - 1) as i64 * delta + v < 0) {
+            return Err(ParquetError::oos("DELTA_LENGTH_BYTE_ARRAY length < 0"));
+        }
+
+        if v > i64::from(u32::MAX) || v + ((num_repeats - 1) as i64) * delta > i64::from(u32::MAX) {
+            return Err(ParquetError::not_supported(
+                "DELTA_LENGTH_BYTE_ARRAY length > u32::MAX",
+            ));
+        }
+
+        target.extend((0..num_repeats).map(|i| (v + (i as i64) * delta) as u32));
+
+        let vstart = v;
+        let vend = v + (num_repeats - 1) as i64 * delta;
+
+        let (min, max) = if delta < 0 {
+            (vend, vstart)
+        } else {
+            (vstart, vend)
+        };
+
+        let sum = lin_natural_sum(v, delta, num_repeats) as usize;
+
+        #[cfg(debug_assertions)]
+        {
+            assert_eq!(
+                (0..num_repeats)
+                    .map(|i| (v + (i as i64) * delta) as usize)
+                    .sum::<usize>(),
+                sum
+            );
+        }
+
+        self.min = self.min.min(min as usize);
+        self.max = self.max.max(max as usize);
+        self.sum += sum;
+
         Ok(())
     }
 }
 
-impl<'a, 'b> BatchableCollector<(), MutableBinaryViewArray<[u8]>> for DeltaCollector<'a, 'b> {
+impl<'a, 'b> BatchableCollector<(), MutableBinaryViewArray<[u8]>> for &mut DeltaCollector<'a, 'b> {
     fn reserve(target: &mut MutableBinaryViewArray<[u8]>, n: usize) {
         target.views_mut().reserve(n);
     }
 
-    fn push_n(&mut self, target: &mut MutableBinaryViewArray<[u8]>, n: usize) -> ParquetResult<()> {
-        let mut gatherer = ViewGatherer {
-            values: self.decoder.values,
-            offset: &mut self.decoder.offset,
-        };
+    fn push_n(
+        &mut self,
+        _target: &mut MutableBinaryViewArray<[u8]>,
+        n: usize,
+    ) -> ParquetResult<()> {
         self.decoder
             .lengths
-            .gather_n_into(target, n, &mut gatherer)?;
+            .gather_n_into(self.pushed_lengths, n, self.gatherer)?;
 
         Ok(())
     }
@@ -201,9 +315,34 @@ impl<'a, 'b> BatchableCollector<(), MutableBinaryViewArray<[u8]>> for DeltaColle
         target: &mut MutableBinaryViewArray<[u8]>,
         n: usize,
     ) -> ParquetResult<()> {
+        self.flush(target);
         target.extend_constant(n, <Option<&[u8]>>::None);
         Ok(())
     }
+
+    fn skip_in_place(&mut self, n: usize) -> ParquetResult<()> {
+        self.decoder.skip_in_place(n)
+    }
+}
+
+impl<'a, 'b> DeltaCollector<'a, 'b> {
+    pub fn flush(&mut self, target: &mut MutableBinaryViewArray<[u8]>) {
+        if !self.pushed_lengths.is_empty() {
+            unsafe {
+                target.extend_from_lengths_with_stats(
+                    &self.decoder.values[self.decoder.offset..],
+                    self.pushed_lengths.iter().map(|&v| v as usize),
+                    self.gatherer.min,
+                    self.gatherer.max,
+                    self.gatherer.sum,
+                )
+            };
+
+            self.decoder.offset += self.gatherer.sum;
+            self.pushed_lengths.clear();
+            *self.gatherer = StatGatherer::default();
+        }
+    }
 }
 
 impl<'a, 'b> BatchableCollector<(), MutableBinaryViewArray<[u8]>> for DeltaBytesCollector<'a, 'b> {
@@ -291,6 +430,10 @@ impl<'a, 'b> BatchableCollector<(), MutableBinaryViewArray<[u8]>> for DeltaBytes
         target.extend_constant(n, <Option<&[u8]>>::None);
         Ok(())
     }
+
+    fn skip_in_place(&mut self, n: usize) -> ParquetResult<()> {
+        self.decoder.skip_in_place(n)
+    }
 }
 
 impl utils::Decoder for BinViewDecoder {
diff --git a/crates/polars-parquet/src/arrow/read/deserialize/boolean.rs b/crates/polars-parquet/src/arrow/read/deserialize/boolean.rs
index 1f33da0678d6..154e46b41c18 100644
--- a/crates/polars-parquet/src/arrow/read/deserialize/boolean.rs
+++ b/crates/polars-parquet/src/arrow/read/deserialize/boolean.rs
@@ -165,6 +165,10 @@ impl<'a, 'b> BatchableCollector<u32, MutableBitmap> for BitmapCollector<'a, 'b>
         target.extend_constant(n, false);
         Ok(())
     }
+
+    fn skip_in_place(&mut self, n: usize) -> ParquetResult<()> {
+        self.0.skip_in_place(n)
+    }
 }
 
 impl ExactSize for (MutableBitmap, MutableBitmap) {
diff --git a/crates/polars-parquet/src/arrow/read/deserialize/dictionary.rs b/crates/polars-parquet/src/arrow/read/deserialize/dictionary.rs
index 4a7b8f740063..ab8098adbdd6 100644
--- a/crates/polars-parquet/src/arrow/read/deserialize/dictionary.rs
+++ b/crates/polars-parquet/src/arrow/read/deserialize/dictionary.rs
@@ -191,6 +191,10 @@ impl<'a, 'b, K: DictionaryKey> BatchableCollector<(), Vec<K>> for DictArrayColle
         target.resize(target.len() + n, K::default());
         Ok(())
     }
+
+    fn skip_in_place(&mut self, n: usize) -> ParquetResult<()> {
+        self.values.skip_in_place(n)
+    }
 }
 
 impl<K: DictionaryKey> Translator<K> for DictArrayTranslator {
diff --git a/crates/polars-parquet/src/arrow/read/deserialize/fixed_size_binary.rs b/crates/polars-parquet/src/arrow/read/deserialize/fixed_size_binary.rs
index 747243ce26ef..c423ab919091 100644
--- a/crates/polars-parquet/src/arrow/read/deserialize/fixed_size_binary.rs
+++ b/crates/polars-parquet/src/arrow/read/deserialize/fixed_size_binary.rs
@@ -16,6 +16,7 @@ pub(crate) enum StateTranslation<'a> {
     Dictionary(hybrid_rle::HybridRleDecoder<'a>, &'a Vec<u8>),
 }
 
+#[derive(Debug)]
 pub struct FixedSizeBinary {
     pub values: Vec<u8>,
     pub size: usize,
@@ -164,6 +165,12 @@ impl Decoder for BinaryDecoder {
                 target.resize(target.len() + n * self.size, 0);
                 Ok(())
             }
+
+            fn skip_in_place(&mut self, n: usize) -> ParquetResult<()> {
+                let n = usize::min(n, self.slice.len() / self.size);
+                *self.slice = &self.slice[n * self.size..];
+                Ok(())
+            }
         }
 
         let mut collector = FixedSizeBinaryCollector {
diff --git a/crates/polars-parquet/src/arrow/read/deserialize/nested.rs b/crates/polars-parquet/src/arrow/read/deserialize/nested.rs
index e200d3c1a8da..05360a08d7d7 100644
--- a/crates/polars-parquet/src/arrow/read/deserialize/nested.rs
+++ b/crates/polars-parquet/src/arrow/read/deserialize/nested.rs
@@ -202,6 +202,17 @@ pub fn columns_to_iter_recursive(
             )?
             .collect_n(filter)?
         },
+        Binary | Utf8 => {
+            init.push(InitNested::Primitive(field.is_nullable));
+            types.pop();
+            PageNestedDecoder::new(
+                columns.pop().unwrap(),
+                field.data_type().clone(),
+                binary::BinaryDecoder::<i32>::default(),
+                init,
+            )?
+            .collect_n(filter)?
+        },
         _ => match field.data_type().to_logical_type() {
             ArrowDataType::Dictionary(key_type, _, _) => {
                 init.push(InitNested::Primitive(field.is_nullable));
diff --git a/crates/polars-parquet/src/arrow/read/deserialize/nested_utils.rs b/crates/polars-parquet/src/arrow/read/deserialize/nested_utils.rs
index fd135a9b63ac..e9c122a6e4b4 100644
--- a/crates/polars-parquet/src/arrow/read/deserialize/nested_utils.rs
+++ b/crates/polars-parquet/src/arrow/read/deserialize/nested_utils.rs
@@ -140,7 +140,7 @@ impl Nested {
 
     fn invalid_num_values(&self) -> usize {
         match &self.content {
-            NestedContent::Primitive => 0,
+            NestedContent::Primitive => 1,
             NestedContent::List { .. } => 0,
             NestedContent::FixedSizeList { width } => *width,
             NestedContent::Struct => 1,
@@ -204,6 +204,10 @@ impl<'a, 'b, 'c, D: utils::NestedDecoder> BatchableCollector<(), D::DecodedState
         self.decoder.push_n_nulls(self.state, target, n);
         Ok(())
     }
+
+    fn skip_in_place(&mut self, n: usize) -> ParquetResult<()> {
+        self.state.skip_in_place(n)
+    }
 }
 
 /// The initial info of nested data types.
@@ -290,6 +294,67 @@ impl NestedState {
     }
 }
 
+/// Calculate the number of leaf values that are covered by the first `limit` definition level
+/// values.
+fn limit_to_num_values(
+    def_iter: &HybridRleDecoder<'_>,
+    def_levels: &[u16],
+    limit: usize,
+) -> ParquetResult<usize> {
+    struct NumValuesGatherer {
+        leaf_def_level: u16,
+    }
+    struct NumValuesState {
+        num_values: usize,
+        length: usize,
+    }
+
+    impl HybridRleGatherer<u32> for NumValuesGatherer {
+        type Target = NumValuesState;
+
+        fn target_reserve(&self, _target: &mut Self::Target, _n: usize) {}
+
+        fn target_num_elements(&self, target: &Self::Target) -> usize {
+            target.length
+        }
+
+        fn hybridrle_to_target(&self, value: u32) -> ParquetResult<u32> {
+            Ok(value)
+        }
+
+        fn gather_one(&self, target: &mut Self::Target, value: u32) -> ParquetResult<()> {
+            target.num_values += usize::from(value == self.leaf_def_level as u32);
+            target.length += 1;
+            Ok(())
+        }
+
+        fn gather_repeated(
+            &self,
+            target: &mut Self::Target,
+            value: u32,
+            n: usize,
+        ) -> ParquetResult<()> {
+            target.num_values += n * usize::from(value == self.leaf_def_level as u32);
+            target.length += n;
+            Ok(())
+        }
+    }
+
+    let mut state = NumValuesState {
+        num_values: 0,
+        length: 0,
+    };
+    def_iter.clone().gather_n_into(
+        &mut state,
+        limit,
+        &NumValuesGatherer {
+            leaf_def_level: *def_levels.last().unwrap(),
+        },
+    )?;
+
+    Ok(state.num_values)
+}
+
 fn idx_to_limit(rep_iter: &HybridRleDecoder<'_>, idx: usize) -> ParquetResult<usize> {
     struct RowIdxOffsetGatherer;
     struct RowIdxOffsetState {
@@ -384,7 +449,7 @@ fn extend_offsets2<'a, D: utils::NestedDecoder>(
     >,
     nested: &mut [Nested],
     filter: Option<Filter>,
-    // Amortized allocations
+
     def_levels: &[u16],
     rep_levels: &[u16],
 ) -> PolarsResult<()> {
@@ -416,6 +481,9 @@ fn extend_offsets2<'a, D: utils::NestedDecoder>(
             if start > 0 {
                 let start_cell = idx_to_limit(&rep_iter, start)?;
 
+                let num_skipped_values = limit_to_num_values(&def_iter, def_levels, start_cell)?;
+                batched_collector.skip_in_place(num_skipped_values)?;
+
                 rep_iter.skip_in_place(start_cell)?;
                 def_iter.skip_in_place(start_cell)?;
             }
@@ -436,6 +504,8 @@ fn extend_offsets2<'a, D: utils::NestedDecoder>(
 
             // @NOTE: This is kind of unused
             let last_skip = def_iter.len();
+            let num_skipped_values = limit_to_num_values(&def_iter, def_levels, last_skip)?;
+            batched_collector.skip_in_place(num_skipped_values)?;
             rep_iter.skip_in_place(last_skip)?;
             def_iter.skip_in_place(last_skip)?;
 
@@ -447,6 +517,8 @@ fn extend_offsets2<'a, D: utils::NestedDecoder>(
                 let num_zeros = iter.take_leading_zeros();
                 if num_zeros > 0 {
                     let offset = idx_to_limit(&rep_iter, num_zeros)?;
+                    let num_skipped_values = limit_to_num_values(&def_iter, def_levels, offset)?;
+                    batched_collector.skip_in_place(num_skipped_values)?;
                     rep_iter.skip_in_place(offset)?;
                     def_iter.skip_in_place(offset)?;
                 }
@@ -601,23 +673,16 @@ fn extend_offsets_limited<'a, D: utils::NestedDecoder>(
                             }
                         }
 
-                        if embed_depth == max_depth - 1 {
-                            for _ in 0..num_elements {
-                                batched_collector.push_invalid();
-                            }
-
-                            break;
-                        }
-
                         let embed_num_values = embed_nest.invalid_num_values();
+                        num_elements *= embed_num_values;
 
                         if embed_num_values == 0 {
                             break;
                         }
-
-                        num_elements *= embed_num_values;
                     }
 
+                    batched_collector.push_n_invalids(num_elements);
+
                     break;
                 }
 
@@ -705,6 +770,7 @@ impl<D: utils::NestedDecoder> PageNestedDecoder<D> {
                         break;
                     };
                     let page = page?;
+                    let page = page.decompress(&mut self.iter)?;
 
                     let mut state =
                         utils::State::new_nested(&self.decoder, &page, self.dict.as_ref())?;
@@ -743,9 +809,11 @@ impl<D: utils::NestedDecoder> PageNestedDecoder<D> {
                         break;
                     };
                     let page = page?;
+                    // We cannot lazily decompress because we don't have the number of leaf values
+                    // at this point. This is encoded within the `definition level` values. *sign*.
+                    // In general, lazy decompression is quite difficult with nested values.
+                    let page = page.decompress(&mut self.iter)?;
 
-                    let mut state =
-                        utils::State::new_nested(&self.decoder, &page, self.dict.as_ref())?;
                     let (def_iter, rep_iter) = level_iters(&page)?;
 
                     let mut count = ZeroCount::default();
@@ -762,6 +830,9 @@ impl<D: utils::NestedDecoder> PageNestedDecoder<D> {
                         None
                     };
 
+                    let mut state =
+                        utils::State::new_nested(&self.decoder, &page, self.dict.as_ref())?;
+
                     let start_length = nested_state.len();
 
                     // @TODO: move this to outside the loop.
diff --git a/crates/polars-parquet/src/arrow/read/deserialize/null.rs b/crates/polars-parquet/src/arrow/read/deserialize/null.rs
index 8c28a7fc66bb..b2ce451ced5d 100644
--- a/crates/polars-parquet/src/arrow/read/deserialize/null.rs
+++ b/crates/polars-parquet/src/arrow/read/deserialize/null.rs
@@ -12,6 +12,7 @@ use crate::parquet::error::ParquetResult;
 use crate::parquet::page::{DataPage, DictPage};
 
 pub(crate) struct NullDecoder;
+#[derive(Debug)]
 pub(crate) struct NullArrayLength {
     length: usize,
 }
@@ -136,18 +137,20 @@ pub fn iter_to_arrays(
         };
         let page = page?;
 
-        let rows = page.num_values();
-        let page_filter;
-        (page_filter, filter) = Filter::opt_split_at(&filter, rows);
+        let state_filter;
+        (state_filter, filter) = Filter::opt_split_at(&filter, page.num_values());
 
-        let num_rows = match page_filter {
-            None => rows,
+        // Skip the whole page if we don't need any rows from it
+        if state_filter.as_ref().is_some_and(|f| f.num_rows() == 0) {
+            continue;
+        }
+
+        let num_rows = match state_filter {
+            None => page.num_values(),
             Some(filter) => filter.num_rows(),
         };
 
         len = (len + num_rows).min(num_rows);
-
-        iter.reuse_page_buffer(page);
     }
 
     Ok(Box::new(NullArray::new(data_type, len)))
diff --git a/crates/polars-parquet/src/arrow/read/deserialize/primitive/basic.rs b/crates/polars-parquet/src/arrow/read/deserialize/primitive/basic.rs
index ce658b764412..696463eefa39 100644
--- a/crates/polars-parquet/src/arrow/read/deserialize/primitive/basic.rs
+++ b/crates/polars-parquet/src/arrow/read/deserialize/primitive/basic.rs
@@ -147,6 +147,11 @@ where
         target.resize(target.len() + n, T::default());
         Ok(())
     }
+
+    fn skip_in_place(&mut self, n: usize) -> ParquetResult<()> {
+        self.chunks.skip_in_place(n);
+        Ok(())
+    }
 }
 
 #[allow(clippy::large_enum_variant)]
@@ -206,7 +211,7 @@ where
         }
 
         match self {
-            Self::Plain(t) => _ = t.nth(n - 1),
+            Self::Plain(t) => t.skip_in_place(n),
             Self::Dictionary(t) => t.values.skip_in_place(n)?,
             Self::ByteStreamSplit(t) => _ = t.iter_converted(|_| ()).nth(n - 1),
         }
diff --git a/crates/polars-parquet/src/arrow/read/deserialize/primitive/integer.rs b/crates/polars-parquet/src/arrow/read/deserialize/primitive/integer.rs
index 45518947e0a1..1a767981d291 100644
--- a/crates/polars-parquet/src/arrow/read/deserialize/primitive/integer.rs
+++ b/crates/polars-parquet/src/arrow/read/deserialize/primitive/integer.rs
@@ -84,7 +84,7 @@ where
         }
 
         match self {
-            Self::Plain(v) => _ = v.nth(n - 1),
+            Self::Plain(v) => v.skip_in_place(n),
             Self::Dictionary(v) => v.values.skip_in_place(n)?,
             Self::ByteStreamSplit(v) => _ = v.iter_converted(|_| ()).nth(n - 1),
             Self::DeltaBinaryPacked(v) => v.skip_in_place(n)?,
diff --git a/crates/polars-parquet/src/arrow/read/deserialize/primitive/mod.rs b/crates/polars-parquet/src/arrow/read/deserialize/primitive/mod.rs
index 45c95a7d5ee1..22da6ff14895 100644
--- a/crates/polars-parquet/src/arrow/read/deserialize/primitive/mod.rs
+++ b/crates/polars-parquet/src/arrow/read/deserialize/primitive/mod.rs
@@ -109,4 +109,8 @@ where
         target.resize(target.len() + n, T::default());
         Ok(())
     }
+
+    fn skip_in_place(&mut self, n: usize) -> ParquetResult<()> {
+        self.decoder.skip_in_place(n)
+    }
 }
diff --git a/crates/polars-parquet/src/arrow/read/deserialize/utils/array_chunks.rs b/crates/polars-parquet/src/arrow/read/deserialize/utils/array_chunks.rs
index f95be359631d..330ad77a7c44 100644
--- a/crates/polars-parquet/src/arrow/read/deserialize/utils/array_chunks.rs
+++ b/crates/polars-parquet/src/arrow/read/deserialize/utils/array_chunks.rs
@@ -24,6 +24,11 @@ impl<'a, P: ParquetNativeType> ArrayChunks<'a, P> {
 
         Some(Self { bytes })
     }
+
+    pub(crate) fn skip_in_place(&mut self, n: usize) {
+        let n = usize::min(self.bytes.len(), n);
+        self.bytes = &self.bytes[n..];
+    }
 }
 
 impl<'a, P: ParquetNativeType> Iterator for ArrayChunks<'a, P> {
@@ -36,13 +41,6 @@ impl<'a, P: ParquetNativeType> Iterator for ArrayChunks<'a, P> {
         Some(item)
     }
 
-    #[inline(always)]
-    fn nth(&mut self, n: usize) -> Option<Self::Item> {
-        let item = self.bytes.get(n)?;
-        self.bytes = &self.bytes[n + 1..];
-        Some(item)
-    }
-
     #[inline(always)]
     fn size_hint(&self) -> (usize, Option<usize>) {
         (self.bytes.len(), Some(self.bytes.len()))
diff --git a/crates/polars-parquet/src/arrow/read/deserialize/utils/mod.rs b/crates/polars-parquet/src/arrow/read/deserialize/utils/mod.rs
index b96f7b6a429c..9c85c14edb0c 100644
--- a/crates/polars-parquet/src/arrow/read/deserialize/utils/mod.rs
+++ b/crates/polars-parquet/src/arrow/read/deserialize/utils/mod.rs
@@ -112,6 +112,11 @@ impl<'a, D: Decoder> State<'a, D> {
         match filter {
             None => {
                 let num_rows = self.len();
+
+                if num_rows == 0 {
+                    return Ok(());
+                }
+
                 self.translation.extend_from_state(
                     decoder,
                     decoded,
@@ -126,12 +131,16 @@ impl<'a, D: Decoder> State<'a, D> {
 
                     self.skip_in_place(start)?;
                     debug_assert!(end - start <= self.len());
-                    self.translation.extend_from_state(
-                        decoder,
-                        decoded,
-                        &mut self.page_validity,
-                        end - start,
-                    )?;
+
+                    if end - start > 0 {
+                        self.translation.extend_from_state(
+                            decoder,
+                            decoded,
+                            &mut self.page_validity,
+                            end - start,
+                        )?;
+                    }
+
                     Ok(())
                 },
                 Filter::Mask(bitmap) => {
@@ -142,12 +151,15 @@ impl<'a, D: Decoder> State<'a, D> {
                         let prev_state_len = self.len();
 
                         let num_ones = iter.take_leading_ones();
-                        self.translation.extend_from_state(
-                            decoder,
-                            decoded,
-                            &mut self.page_validity,
-                            num_ones,
-                        )?;
+
+                        if num_ones > 0 {
+                            self.translation.extend_from_state(
+                                decoder,
+                                decoded,
+                                &mut self.page_validity,
+                                num_ones,
+                            )?;
+                        }
 
                         if iter.num_remaining() == 0 || self.len() == 0 {
                             break;
@@ -171,11 +183,9 @@ impl<'a, D: Decoder> State<'a, D> {
 
 pub fn not_implemented(page: &DataPage) -> ParquetError {
     let is_optional = page.descriptor.primitive_type.field_info.repetition == Repetition::Optional;
-    let is_filtered = page.selected_rows().is_some();
     let required = if is_optional { "optional" } else { "required" };
-    let is_filtered = if is_filtered { ", index-filtered" } else { "" };
     ParquetError::not_supported(format!(
-        "Decoding {:?} \"{:?}\"-encoded {required}{is_filtered} parquet pages not yet supported",
+        "Decoding {:?} \"{:?}\"-encoded {required} parquet pages not yet supported",
         page.descriptor.primitive_type.physical_type,
         page.encoding(),
     ))
@@ -185,14 +195,15 @@ pub trait BatchableCollector<I, T> {
     fn reserve(target: &mut T, n: usize);
     fn push_n(&mut self, target: &mut T, n: usize) -> ParquetResult<()>;
     fn push_n_nulls(&mut self, target: &mut T, n: usize) -> ParquetResult<()>;
+    fn skip_in_place(&mut self, n: usize) -> ParquetResult<()>;
 }
 
 /// This batches sequential collect operations to try and prevent unnecessary buffering and
 /// `Iterator::next` polling.
 #[must_use]
 pub struct BatchedCollector<'a, I, T, C: BatchableCollector<I, T>> {
-    num_waiting_valids: usize,
-    num_waiting_invalids: usize,
+    pub(crate) num_waiting_valids: usize,
+    pub(crate) num_waiting_invalids: usize,
 
     target: &'a mut T,
     collector: C,
@@ -243,6 +254,24 @@ impl<'a, I, T, C: BatchableCollector<I, T>> BatchedCollector<'a, I, T, C> {
         self.num_waiting_invalids += n;
     }
 
+    #[inline]
+    pub fn skip_in_place(&mut self, n: usize) -> ParquetResult<()> {
+        if self.num_waiting_valids > 0 {
+            self.collector
+                .push_n(self.target, self.num_waiting_valids)?;
+            self.num_waiting_valids = 0;
+        }
+        if self.num_waiting_invalids > 0 {
+            self.collector
+                .push_n_nulls(self.target, self.num_waiting_invalids)?;
+            self.num_waiting_invalids = 0;
+        }
+
+        self.collector.skip_in_place(n)?;
+
+        Ok(())
+    }
+
     #[inline]
     pub fn finalize(mut self) -> ParquetResult<()> {
         self.collector
@@ -403,6 +432,11 @@ where
         target.resize(target.len() + n, O::default());
         Ok(())
     }
+
+    #[inline]
+    fn skip_in_place(&mut self, n: usize) -> ParquetResult<()> {
+        self.decoder.skip_in_place(n)
+    }
 }
 
 pub struct GatheredHybridRle<'a, 'b, 'c, O, G>
@@ -453,6 +487,11 @@ where
             .gather_repeated(target, self.null_value.clone(), n)?;
         Ok(())
     }
+
+    #[inline]
+    fn skip_in_place(&mut self, n: usize) -> ParquetResult<()> {
+        self.decoder.skip_in_place(n)
+    }
 }
 
 impl<'a, 'b, 'c, O, Out, G> BatchableCollector<u8, Binary<O>>
@@ -480,6 +519,11 @@ where
             .gather_repeated(target, self.null_value.clone(), n)?;
         Ok(())
     }
+
+    #[inline]
+    fn skip_in_place(&mut self, n: usize) -> ParquetResult<()> {
+        self.decoder.skip_in_place(n)
+    }
 }
 
 impl<'a, 'b, 'c, T> BatchableCollector<u32, MutableBinaryViewArray<[u8]>>
@@ -513,6 +557,11 @@ where
         target.extend_null(n);
         Ok(())
     }
+
+    #[inline]
+    fn skip_in_place(&mut self, n: usize) -> ParquetResult<()> {
+        self.decoder.skip_in_place(n)
+    }
 }
 
 impl<T, P: Pushable<T>, I: Iterator<Item = T>> BatchableCollector<T, P> for I {
@@ -532,6 +581,14 @@ impl<T, P: Pushable<T>, I: Iterator<Item = T>> BatchableCollector<T, P> for I {
         target.extend_null_constant(n);
         Ok(())
     }
+
+    #[inline]
+    fn skip_in_place(&mut self, n: usize) -> ParquetResult<()> {
+        if n > 0 {
+            _ = self.nth(n - 1);
+        }
+        Ok(())
+    }
 }
 
 /// An item with a known size
@@ -653,21 +710,22 @@ impl<D: Decoder> PageDecoder<D> {
 
         while num_rows_remaining > 0 {
             let Some(page) = self.iter.next() else {
-                return self.decoder.finalize(self.data_type, self.dict, target);
+                break;
             };
             let page = page?;
 
-            let mut state = State::new(&self.decoder, &page, self.dict.as_ref())?;
-            let state_len = state.len();
-
             let state_filter;
-            (state_filter, filter) = Filter::opt_split_at(&filter, state_len);
+            (state_filter, filter) = Filter::opt_split_at(&filter, page.num_values());
 
             // Skip the whole page if we don't need any rows from it
             if state_filter.as_ref().is_some_and(|f| f.num_rows() == 0) {
                 continue;
             }
 
+            let page = page.decompress(&mut self.iter)?;
+
+            let mut state = State::new(&self.decoder, &page, self.dict.as_ref())?;
+
             let start_length = target.len();
             state.extend_from_state(&mut self.decoder, &mut target, state_filter)?;
             let end_length = target.len();
diff --git a/crates/polars-parquet/src/arrow/read/indexes/binary.rs b/crates/polars-parquet/src/arrow/read/indexes/binary.rs
deleted file mode 100644
index b6e017644746..000000000000
--- a/crates/polars-parquet/src/arrow/read/indexes/binary.rs
+++ /dev/null
@@ -1,44 +0,0 @@
-use arrow::array::{Array, BinaryArray, PrimitiveArray, Utf8Array};
-use arrow::datatypes::{ArrowDataType, PhysicalType};
-use arrow::trusted_len::TrustedLen;
-use polars_error::{to_compute_err, PolarsResult};
-
-use super::ColumnPageStatistics;
-use crate::parquet::indexes::PageIndex;
-
-pub fn deserialize(
-    indexes: &[PageIndex<Vec<u8>>],
-    data_type: &ArrowDataType,
-) -> PolarsResult<ColumnPageStatistics> {
-    Ok(ColumnPageStatistics {
-        min: deserialize_binary_iter(indexes.iter().map(|index| index.min.as_ref()), data_type)?,
-        max: deserialize_binary_iter(indexes.iter().map(|index| index.max.as_ref()), data_type)?,
-        null_count: PrimitiveArray::from_trusted_len_iter(
-            indexes
-                .iter()
-                .map(|index| index.null_count.map(|x| x as u64)),
-        ),
-    })
-}
-
-fn deserialize_binary_iter<'a, I: TrustedLen<Item = Option<&'a Vec<u8>>>>(
-    iter: I,
-    data_type: &ArrowDataType,
-) -> PolarsResult<Box<dyn Array>> {
-    match data_type.to_physical_type() {
-        PhysicalType::LargeBinary => Ok(Box::new(BinaryArray::<i64>::from_iter(iter))),
-        PhysicalType::Utf8 => {
-            let iter = iter.map(|x| x.map(|x| std::str::from_utf8(x)).transpose());
-            Ok(Box::new(
-                Utf8Array::<i32>::try_from_trusted_len_iter(iter).map_err(to_compute_err)?,
-            ))
-        },
-        PhysicalType::LargeUtf8 => {
-            let iter = iter.map(|x| x.map(|x| std::str::from_utf8(x)).transpose());
-            Ok(Box::new(
-                Utf8Array::<i64>::try_from_trusted_len_iter(iter).map_err(to_compute_err)?,
-            ))
-        },
-        _ => Ok(Box::new(BinaryArray::<i32>::from_iter(iter))),
-    }
-}
diff --git a/crates/polars-parquet/src/arrow/read/indexes/boolean.rs b/crates/polars-parquet/src/arrow/read/indexes/boolean.rs
deleted file mode 100644
index b6414e24a621..000000000000
--- a/crates/polars-parquet/src/arrow/read/indexes/boolean.rs
+++ /dev/null
@@ -1,20 +0,0 @@
-use arrow::array::{BooleanArray, PrimitiveArray};
-
-use super::ColumnPageStatistics;
-use crate::parquet::indexes::PageIndex;
-
-pub fn deserialize(indexes: &[PageIndex<bool>]) -> ColumnPageStatistics {
-    ColumnPageStatistics {
-        min: Box::new(BooleanArray::from_trusted_len_iter(
-            indexes.iter().map(|index| index.min),
-        )),
-        max: Box::new(BooleanArray::from_trusted_len_iter(
-            indexes.iter().map(|index| index.max),
-        )),
-        null_count: PrimitiveArray::from_trusted_len_iter(
-            indexes
-                .iter()
-                .map(|index| index.null_count.map(|x| x as u64)),
-        ),
-    }
-}
diff --git a/crates/polars-parquet/src/arrow/read/indexes/fixed_len_binary.rs b/crates/polars-parquet/src/arrow/read/indexes/fixed_len_binary.rs
deleted file mode 100644
index 5b2785b22b06..000000000000
--- a/crates/polars-parquet/src/arrow/read/indexes/fixed_len_binary.rs
+++ /dev/null
@@ -1,70 +0,0 @@
-use arrow::array::{Array, FixedSizeBinaryArray, MutableFixedSizeBinaryArray, PrimitiveArray};
-use arrow::datatypes::{ArrowDataType, PhysicalType, PrimitiveType};
-use arrow::trusted_len::TrustedLen;
-use arrow::types::{i256, NativeType};
-
-use super::ColumnPageStatistics;
-use crate::parquet::indexes::PageIndex;
-
-pub fn deserialize(
-    indexes: &[PageIndex<Vec<u8>>],
-    data_type: ArrowDataType,
-) -> ColumnPageStatistics {
-    ColumnPageStatistics {
-        min: deserialize_binary_iter(
-            indexes.iter().map(|index| index.min.as_ref()),
-            data_type.clone(),
-        ),
-        max: deserialize_binary_iter(indexes.iter().map(|index| index.max.as_ref()), data_type),
-        null_count: PrimitiveArray::from_trusted_len_iter(
-            indexes
-                .iter()
-                .map(|index| index.null_count.map(|x| x as u64)),
-        ),
-    }
-}
-
-fn deserialize_binary_iter<'a, I: TrustedLen<Item = Option<&'a Vec<u8>>>>(
-    iter: I,
-    data_type: ArrowDataType,
-) -> Box<dyn Array> {
-    match data_type.to_physical_type() {
-        PhysicalType::Primitive(PrimitiveType::Int128) => {
-            Box::new(PrimitiveArray::from_trusted_len_iter(iter.map(|v| {
-                v.map(|x| {
-                    // Copy the fixed-size byte value to the start of a 16 byte stack
-                    // allocated buffer, then use an arithmetic right shift to fill in
-                    // MSBs, which accounts for leading 1's in negative (two's complement)
-                    // values.
-                    let n = x.len();
-                    let mut bytes = [0u8; 16];
-                    bytes[..n].copy_from_slice(x);
-                    i128::from_be_bytes(bytes) >> (8 * (16 - n))
-                })
-            })))
-        },
-        PhysicalType::Primitive(PrimitiveType::Int256) => {
-            Box::new(PrimitiveArray::from_trusted_len_iter(iter.map(|v| {
-                v.map(|x| {
-                    let n = x.len();
-                    let mut bytes = [0u8; 32];
-                    bytes[..n].copy_from_slice(x);
-                    i256::from_be_bytes(bytes)
-                })
-            })))
-        },
-        _ => {
-            let mut a = MutableFixedSizeBinaryArray::try_new(
-                data_type,
-                Vec::with_capacity(iter.size_hint().0),
-                None,
-            )
-            .unwrap();
-            for item in iter {
-                a.push(item);
-            }
-            let a: FixedSizeBinaryArray = a.into();
-            Box::new(a)
-        },
-    }
-}
diff --git a/crates/polars-parquet/src/arrow/read/indexes/mod.rs b/crates/polars-parquet/src/arrow/read/indexes/mod.rs
deleted file mode 100644
index 9cf465c64206..000000000000
--- a/crates/polars-parquet/src/arrow/read/indexes/mod.rs
+++ /dev/null
@@ -1,377 +0,0 @@
-//! API to perform page-level filtering (also known as indexes)
-use crate::parquet::error::ParquetError;
-use crate::parquet::indexes::{
-    select_pages, BooleanIndex, ByteIndex, FixedLenByteIndex, Index as ParquetIndex, NativeIndex,
-    PageLocation,
-};
-use crate::parquet::metadata::{ColumnChunkMetaData, RowGroupMetaData};
-use crate::parquet::read::{read_columns_indexes as _read_columns_indexes, read_pages_locations};
-use crate::parquet::schema::types::PhysicalType as ParquetPhysicalType;
-
-mod binary;
-mod boolean;
-mod fixed_len_binary;
-mod primitive;
-
-use std::collections::VecDeque;
-use std::io::{Read, Seek};
-
-use arrow::array::{Array, UInt64Array};
-use arrow::datatypes::{ArrowDataType, Field, PhysicalType, PrimitiveType};
-use polars_error::{polars_bail, PolarsResult};
-
-use super::get_field_pages;
-pub use crate::parquet::indexes::{FilteredPage, Interval};
-
-/// Page statistics of an Arrow field.
-#[derive(Debug, PartialEq)]
-pub enum FieldPageStatistics {
-    /// Variant used for fields with a single parquet column (e.g. primitives, dictionaries, list)
-    Single(ColumnPageStatistics),
-    /// Variant used for fields with multiple parquet columns (e.g. Struct, Map)
-    Multiple(Vec<FieldPageStatistics>),
-}
-
-impl From<ColumnPageStatistics> for FieldPageStatistics {
-    fn from(column: ColumnPageStatistics) -> Self {
-        Self::Single(column)
-    }
-}
-
-/// [`ColumnPageStatistics`] contains the minimum, maximum, and null_count
-/// of each page of a parquet column, as an [`Array`].
-/// This struct has the following invariants:
-/// * `min`, `max` and `null_count` have the same length (equal to the number of pages in the column)
-/// * `min`, `max` and `null_count` are guaranteed to be non-null
-/// * `min` and `max` have the same logical type
-#[derive(Debug, PartialEq)]
-pub struct ColumnPageStatistics {
-    /// The minimum values in the pages
-    pub min: Box<dyn Array>,
-    /// The maximum values in the pages
-    pub max: Box<dyn Array>,
-    /// The number of null values in the pages.
-    pub null_count: UInt64Array,
-}
-
-/// Given a sequence of [`ParquetIndex`] representing the page indexes of each column in the
-/// parquet file, returns the page-level statistics as a [`FieldPageStatistics`].
-///
-/// This function maps timestamps, decimal types, etc. accordingly.
-/// # Implementation
-/// This function is CPU-bounded `O(P)` where `P` is the total number of pages on all columns.
-/// # Error
-/// This function errors iff the value is not deserializable to arrow (e.g. invalid utf-8)
-fn deserialize(
-    indexes: &mut VecDeque<&dyn ParquetIndex>,
-    data_type: ArrowDataType,
-) -> PolarsResult<FieldPageStatistics> {
-    match data_type.to_physical_type() {
-        PhysicalType::Boolean => {
-            let index = indexes
-                .pop_front()
-                .unwrap()
-                .as_any()
-                .downcast_ref::<BooleanIndex>()
-                .unwrap();
-            Ok(boolean::deserialize(&index.indexes).into())
-        },
-        PhysicalType::Primitive(PrimitiveType::Int128) => {
-            let index = indexes.pop_front().unwrap();
-            match index.physical_type() {
-                ParquetPhysicalType::Int32 => {
-                    let index = index.as_any().downcast_ref::<NativeIndex<i32>>().unwrap();
-                    Ok(primitive::deserialize_i32(&index.indexes, data_type).into())
-                },
-                crate::parquet::schema::types::PhysicalType::Int64 => {
-                    let index = index.as_any().downcast_ref::<NativeIndex<i64>>().unwrap();
-                    Ok(
-                        primitive::deserialize_i64(
-                            &index.indexes,
-                            &index.primitive_type,
-                            data_type,
-                        )
-                        .into(),
-                    )
-                },
-                crate::parquet::schema::types::PhysicalType::FixedLenByteArray(_) => {
-                    let index = index.as_any().downcast_ref::<FixedLenByteIndex>().unwrap();
-                    Ok(fixed_len_binary::deserialize(&index.indexes, data_type).into())
-                },
-                other => polars_bail!(nyi = "Deserialize {other:?} to arrow's int64"),
-            }
-        },
-        PhysicalType::Primitive(PrimitiveType::Int256) => {
-            let index = indexes.pop_front().unwrap();
-            match index.physical_type() {
-                ParquetPhysicalType::Int32 => {
-                    let index = index.as_any().downcast_ref::<NativeIndex<i32>>().unwrap();
-                    Ok(primitive::deserialize_i32(&index.indexes, data_type).into())
-                },
-                crate::parquet::schema::types::PhysicalType::Int64 => {
-                    let index = index.as_any().downcast_ref::<NativeIndex<i64>>().unwrap();
-                    Ok(
-                        primitive::deserialize_i64(
-                            &index.indexes,
-                            &index.primitive_type,
-                            data_type,
-                        )
-                        .into(),
-                    )
-                },
-                crate::parquet::schema::types::PhysicalType::FixedLenByteArray(_) => {
-                    let index = index.as_any().downcast_ref::<FixedLenByteIndex>().unwrap();
-                    Ok(fixed_len_binary::deserialize(&index.indexes, data_type).into())
-                },
-                other => polars_bail!(nyi = "Deserialize {other:?} to arrow's int64"),
-            }
-        },
-        PhysicalType::Primitive(PrimitiveType::UInt8)
-        | PhysicalType::Primitive(PrimitiveType::UInt16)
-        | PhysicalType::Primitive(PrimitiveType::UInt32)
-        | PhysicalType::Primitive(PrimitiveType::Int32) => {
-            let index = indexes
-                .pop_front()
-                .unwrap()
-                .as_any()
-                .downcast_ref::<NativeIndex<i32>>()
-                .unwrap();
-            Ok(primitive::deserialize_i32(&index.indexes, data_type).into())
-        },
-        PhysicalType::Primitive(PrimitiveType::UInt64)
-        | PhysicalType::Primitive(PrimitiveType::Int64) => {
-            let index = indexes.pop_front().unwrap();
-            match index.physical_type() {
-                ParquetPhysicalType::Int64 => {
-                    let index = index.as_any().downcast_ref::<NativeIndex<i64>>().unwrap();
-                    Ok(
-                        primitive::deserialize_i64(
-                            &index.indexes,
-                            &index.primitive_type,
-                            data_type,
-                        )
-                        .into(),
-                    )
-                },
-                crate::parquet::schema::types::PhysicalType::Int96 => {
-                    let index = index
-                        .as_any()
-                        .downcast_ref::<NativeIndex<[u32; 3]>>()
-                        .unwrap();
-                    Ok(primitive::deserialize_i96(&index.indexes, data_type).into())
-                },
-                other => polars_bail!(nyi = "Deserialize {other:?} to arrow's int64"),
-            }
-        },
-        PhysicalType::Primitive(PrimitiveType::Float32) => {
-            let index = indexes
-                .pop_front()
-                .unwrap()
-                .as_any()
-                .downcast_ref::<NativeIndex<f32>>()
-                .unwrap();
-            Ok(primitive::deserialize_id(&index.indexes, data_type).into())
-        },
-        PhysicalType::Primitive(PrimitiveType::Float64) => {
-            let index = indexes
-                .pop_front()
-                .unwrap()
-                .as_any()
-                .downcast_ref::<NativeIndex<f64>>()
-                .unwrap();
-            Ok(primitive::deserialize_id(&index.indexes, data_type).into())
-        },
-        PhysicalType::Binary
-        | PhysicalType::LargeBinary
-        | PhysicalType::Utf8
-        | PhysicalType::LargeUtf8
-        | PhysicalType::Utf8View
-        | PhysicalType::BinaryView => {
-            let index = indexes
-                .pop_front()
-                .unwrap()
-                .as_any()
-                .downcast_ref::<ByteIndex>()
-                .unwrap();
-            binary::deserialize(&index.indexes, &data_type).map(|x| x.into())
-        },
-        PhysicalType::FixedSizeBinary => {
-            let index = indexes
-                .pop_front()
-                .unwrap()
-                .as_any()
-                .downcast_ref::<FixedLenByteIndex>()
-                .unwrap();
-            Ok(fixed_len_binary::deserialize(&index.indexes, data_type).into())
-        },
-        PhysicalType::Dictionary(_) => {
-            if let ArrowDataType::Dictionary(_, inner, _) = data_type.to_logical_type() {
-                deserialize(indexes, (**inner).clone())
-            } else {
-                unreachable!()
-            }
-        },
-        PhysicalType::List => {
-            if let ArrowDataType::List(inner) = data_type.to_logical_type() {
-                deserialize(indexes, inner.data_type.clone())
-            } else {
-                unreachable!()
-            }
-        },
-        PhysicalType::LargeList => {
-            if let ArrowDataType::LargeList(inner) = data_type.to_logical_type() {
-                deserialize(indexes, inner.data_type.clone())
-            } else {
-                unreachable!()
-            }
-        },
-        PhysicalType::Map => {
-            if let ArrowDataType::Map(inner, _) = data_type.to_logical_type() {
-                deserialize(indexes, inner.data_type.clone())
-            } else {
-                unreachable!()
-            }
-        },
-        PhysicalType::Struct => {
-            let children_fields =
-                if let ArrowDataType::Struct(children) = data_type.to_logical_type() {
-                    children
-                } else {
-                    unreachable!()
-                };
-            let children = children_fields
-                .iter()
-                .map(|child| deserialize(indexes, child.data_type.clone()))
-                .collect::<PolarsResult<Vec<_>>>()?;
-
-            Ok(FieldPageStatistics::Multiple(children))
-        },
-
-        other => polars_bail!(nyi = "Deserialize into arrow's {other:?} page index"),
-    }
-}
-
-/// Checks whether the row group have page index information (page statistics)
-pub fn has_indexes(row_group: &RowGroupMetaData) -> bool {
-    row_group
-        .columns()
-        .iter()
-        .all(|chunk| chunk.column_chunk().column_index_offset.is_some())
-}
-
-/// Reads the column indexes from the reader assuming a valid set of derived Arrow fields
-/// for all parquet the columns in the file.
-///
-/// It returns one [`FieldPageStatistics`] per field in `fields`
-///
-/// This function is expected to be used to filter out parquet pages.
-///
-/// # Implementation
-/// This function is IO-bounded and calls `reader.read_exact` exactly once.
-/// # Error
-/// Errors iff the indexes can't be read or their deserialization to arrow is incorrect (e.g. invalid utf-8)
-pub fn read_columns_indexes<R: Read + Seek>(
-    reader: &mut R,
-    chunks: &[ColumnChunkMetaData],
-    fields: &[Field],
-) -> PolarsResult<Vec<FieldPageStatistics>> {
-    let indexes = _read_columns_indexes(reader, chunks)?;
-
-    fields
-        .iter()
-        .map(|field| {
-            let indexes = get_field_pages(chunks, &indexes, &field.name);
-            let mut indexes = indexes.into_iter().map(|boxed| boxed.as_ref()).collect();
-
-            deserialize(&mut indexes, field.data_type.clone())
-        })
-        .collect()
-}
-
-/// Returns the set of (row) intervals of the pages.
-pub fn compute_page_row_intervals(
-    locations: &[PageLocation],
-    num_rows: usize,
-) -> Result<Vec<Interval>, ParquetError> {
-    if locations.is_empty() {
-        return Ok(vec![]);
-    };
-
-    let last = (|| {
-        let start: usize = locations.last().unwrap().first_row_index.try_into()?;
-        let length = num_rows - start;
-        Result::<_, ParquetError>::Ok(Interval::new(start, length))
-    })();
-
-    let pages_lengths = locations
-        .windows(2)
-        .map(|x| {
-            let start = usize::try_from(x[0].first_row_index)?;
-            let length = usize::try_from(x[1].first_row_index - x[0].first_row_index)?;
-            Ok(Interval::new(start, length))
-        })
-        .chain(std::iter::once(last));
-    pages_lengths.collect()
-}
-
-/// Reads all page locations and index locations (IO-bounded) and uses `predicate` to compute
-/// the set of [`FilteredPage`] that fulfill the predicate.
-///
-/// The non-trivial argument of this function is `predicate`, that controls which pages are selected.
-/// Its signature contains 2 arguments:
-/// * 0th argument (indexes): contains one [`ColumnPageStatistics`] (page statistics) per field.
-///   Use it to evaluate the predicate against
-/// * 1th argument (intervals): contains one [`Vec<Vec<Interval>>`] (row positions) per field.
-///   For each field, the outermost vector corresponds to each parquet column:
-///   a primitive field contains 1 column, a struct field with 2 primitive fields contain 2 columns.
-///   The inner `Vec<Interval>` contains one [`Interval`] per page: its length equals the length of [`ColumnPageStatistics`].
-///
-/// It returns a single [`Vec<Interval>`] denoting the set of intervals that the predicate selects (over all columns).
-///
-/// This returns one item per `field`. For each field, there is one item per column (for non-nested types it returns one column)
-/// and finally [`Vec<FilteredPage>`], that corresponds to the set of selected pages.
-pub fn read_filtered_pages<
-    R: Read + Seek,
-    F: Fn(&[FieldPageStatistics], &[Vec<Vec<Interval>>]) -> Vec<Interval>,
->(
-    reader: &mut R,
-    row_group: &RowGroupMetaData,
-    fields: &[Field],
-    predicate: F,
-    //is_intersection: bool,
-) -> PolarsResult<Vec<Vec<Vec<FilteredPage>>>> {
-    let num_rows = row_group.num_rows();
-
-    // one vec per column
-    let locations = read_pages_locations(reader, row_group.columns())?;
-    // one Vec<Vec<>> per field (non-nested contain a single entry on the first column)
-    let locations = fields
-        .iter()
-        .map(|field| get_field_pages(row_group.columns(), &locations, &field.name))
-        .collect::<Vec<_>>();
-
-    // one ColumnPageStatistics per field
-    let indexes = read_columns_indexes(reader, row_group.columns(), fields)?;
-
-    let intervals = locations
-        .iter()
-        .map(|locations| {
-            locations
-                .iter()
-                .map(|locations| Ok(compute_page_row_intervals(locations, num_rows)?))
-                .collect::<PolarsResult<Vec<_>>>()
-        })
-        .collect::<PolarsResult<Vec<_>>>()?;
-
-    let intervals = predicate(&indexes, &intervals);
-
-    locations
-        .into_iter()
-        .map(|locations| {
-            locations
-                .into_iter()
-                .map(|locations| Ok(select_pages(&intervals, locations, num_rows)?))
-                .collect::<PolarsResult<Vec<_>>>()
-        })
-        .collect()
-}
diff --git a/crates/polars-parquet/src/arrow/read/indexes/primitive.rs b/crates/polars-parquet/src/arrow/read/indexes/primitive.rs
deleted file mode 100644
index dfd72bc9c54e..000000000000
--- a/crates/polars-parquet/src/arrow/read/indexes/primitive.rs
+++ /dev/null
@@ -1,227 +0,0 @@
-use arrow::array::{Array, MutablePrimitiveArray, PrimitiveArray};
-use arrow::datatypes::{ArrowDataType, TimeUnit};
-use arrow::trusted_len::TrustedLen;
-use arrow::types::{i256, NativeType};
-use ethnum::I256;
-
-use super::ColumnPageStatistics;
-use crate::parquet::indexes::PageIndex;
-use crate::parquet::schema::types::{
-    PrimitiveLogicalType, PrimitiveType, TimeUnit as ParquetTimeUnit,
-};
-use crate::parquet::types::int96_to_i64_ns;
-
-#[inline]
-fn deserialize_int32<I: TrustedLen<Item = Option<i32>>>(
-    iter: I,
-    data_type: ArrowDataType,
-) -> Box<dyn Array> {
-    use ArrowDataType::*;
-    match data_type.to_logical_type() {
-        UInt8 => Box::new(
-            PrimitiveArray::<u8>::from_trusted_len_iter(iter.map(|x| x.map(|x| x as u8)))
-                .to(data_type),
-        ) as _,
-        UInt16 => Box::new(
-            PrimitiveArray::<u16>::from_trusted_len_iter(iter.map(|x| x.map(|x| x as u16)))
-                .to(data_type),
-        ),
-        UInt32 => Box::new(
-            PrimitiveArray::<u32>::from_trusted_len_iter(iter.map(|x| x.map(|x| x as u32)))
-                .to(data_type),
-        ),
-        Decimal(_, _) => Box::new(
-            PrimitiveArray::<i128>::from_trusted_len_iter(iter.map(|x| x.map(|x| x as i128)))
-                .to(data_type),
-        ),
-        Decimal256(_, _) => Box::new(
-            PrimitiveArray::<i256>::from_trusted_len_iter(
-                iter.map(|x| x.map(|x| i256(I256::new(x.into())))),
-            )
-            .to(data_type),
-        ) as _,
-        _ => Box::new(PrimitiveArray::<i32>::from_trusted_len_iter(iter).to(data_type)),
-    }
-}
-
-#[inline]
-fn timestamp(
-    array: &mut MutablePrimitiveArray<i64>,
-    time_unit: TimeUnit,
-    logical_type: Option<PrimitiveLogicalType>,
-) {
-    let unit = if let Some(PrimitiveLogicalType::Timestamp { unit, .. }) = logical_type {
-        unit
-    } else {
-        return;
-    };
-
-    match (unit, time_unit) {
-        (ParquetTimeUnit::Milliseconds, TimeUnit::Second) => array
-            .values_mut_slice()
-            .iter_mut()
-            .for_each(|x| *x /= 1_000),
-        (ParquetTimeUnit::Microseconds, TimeUnit::Second) => array
-            .values_mut_slice()
-            .iter_mut()
-            .for_each(|x| *x /= 1_000_000),
-        (ParquetTimeUnit::Nanoseconds, TimeUnit::Second) => array
-            .values_mut_slice()
-            .iter_mut()
-            .for_each(|x| *x /= 1_000_000_000),
-
-        (ParquetTimeUnit::Milliseconds, TimeUnit::Millisecond) => {},
-        (ParquetTimeUnit::Microseconds, TimeUnit::Millisecond) => array
-            .values_mut_slice()
-            .iter_mut()
-            .for_each(|x| *x /= 1_000),
-        (ParquetTimeUnit::Nanoseconds, TimeUnit::Millisecond) => array
-            .values_mut_slice()
-            .iter_mut()
-            .for_each(|x| *x /= 1_000_000),
-
-        (ParquetTimeUnit::Milliseconds, TimeUnit::Microsecond) => array
-            .values_mut_slice()
-            .iter_mut()
-            .for_each(|x| *x *= 1_000),
-        (ParquetTimeUnit::Microseconds, TimeUnit::Microsecond) => {},
-        (ParquetTimeUnit::Nanoseconds, TimeUnit::Microsecond) => array
-            .values_mut_slice()
-            .iter_mut()
-            .for_each(|x| *x /= 1_000),
-
-        (ParquetTimeUnit::Milliseconds, TimeUnit::Nanosecond) => array
-            .values_mut_slice()
-            .iter_mut()
-            .for_each(|x| *x *= 1_000_000),
-        (ParquetTimeUnit::Microseconds, TimeUnit::Nanosecond) => array
-            .values_mut_slice()
-            .iter_mut()
-            .for_each(|x| *x /= 1_000),
-        (ParquetTimeUnit::Nanoseconds, TimeUnit::Nanosecond) => {},
-    }
-}
-
-#[inline]
-fn deserialize_int64<I: TrustedLen<Item = Option<i64>>>(
-    iter: I,
-    primitive_type: &PrimitiveType,
-    data_type: ArrowDataType,
-) -> Box<dyn Array> {
-    use ArrowDataType::*;
-    match data_type.to_logical_type() {
-        UInt64 => Box::new(
-            PrimitiveArray::<u64>::from_trusted_len_iter(iter.map(|x| x.map(|x| x as u64)))
-                .to(data_type),
-        ) as _,
-        Decimal(_, _) => Box::new(
-            PrimitiveArray::<i128>::from_trusted_len_iter(iter.map(|x| x.map(|x| x as i128)))
-                .to(data_type),
-        ) as _,
-        Decimal256(_, _) => Box::new(
-            PrimitiveArray::<i256>::from_trusted_len_iter(
-                iter.map(|x| x.map(|x| i256(I256::new(x.into())))),
-            )
-            .to(data_type),
-        ) as _,
-        Timestamp(time_unit, _) => {
-            let mut array =
-                MutablePrimitiveArray::<i64>::from_trusted_len_iter(iter).to(data_type.clone());
-
-            timestamp(&mut array, *time_unit, primitive_type.logical_type);
-
-            let array: PrimitiveArray<i64> = array.into();
-
-            Box::new(array)
-        },
-        _ => Box::new(PrimitiveArray::<i64>::from_trusted_len_iter(iter).to(data_type)),
-    }
-}
-
-#[inline]
-fn deserialize_int96<I: TrustedLen<Item = Option<[u32; 3]>>>(
-    iter: I,
-    data_type: ArrowDataType,
-) -> Box<dyn Array> {
-    Box::new(
-        PrimitiveArray::<i64>::from_trusted_len_iter(iter.map(|x| x.map(int96_to_i64_ns)))
-            .to(data_type),
-    )
-}
-
-#[inline]
-fn deserialize_id_s<T: NativeType, I: TrustedLen<Item = Option<T>>>(
-    iter: I,
-    data_type: ArrowDataType,
-) -> Box<dyn Array> {
-    Box::new(PrimitiveArray::<T>::from_trusted_len_iter(iter).to(data_type))
-}
-
-pub fn deserialize_i32(
-    indexes: &[PageIndex<i32>],
-    data_type: ArrowDataType,
-) -> ColumnPageStatistics {
-    ColumnPageStatistics {
-        min: deserialize_int32(indexes.iter().map(|index| index.min), data_type.clone()),
-        max: deserialize_int32(indexes.iter().map(|index| index.max), data_type),
-        null_count: PrimitiveArray::from_trusted_len_iter(
-            indexes
-                .iter()
-                .map(|index| index.null_count.map(|x| x as u64)),
-        ),
-    }
-}
-
-pub fn deserialize_i64(
-    indexes: &[PageIndex<i64>],
-    primitive_type: &PrimitiveType,
-    data_type: ArrowDataType,
-) -> ColumnPageStatistics {
-    ColumnPageStatistics {
-        min: deserialize_int64(
-            indexes.iter().map(|index| index.min),
-            primitive_type,
-            data_type.clone(),
-        ),
-        max: deserialize_int64(
-            indexes.iter().map(|index| index.max),
-            primitive_type,
-            data_type,
-        ),
-        null_count: PrimitiveArray::from_trusted_len_iter(
-            indexes
-                .iter()
-                .map(|index| index.null_count.map(|x| x as u64)),
-        ),
-    }
-}
-
-pub fn deserialize_i96(
-    indexes: &[PageIndex<[u32; 3]>],
-    data_type: ArrowDataType,
-) -> ColumnPageStatistics {
-    ColumnPageStatistics {
-        min: deserialize_int96(indexes.iter().map(|index| index.min), data_type.clone()),
-        max: deserialize_int96(indexes.iter().map(|index| index.max), data_type),
-        null_count: PrimitiveArray::from_trusted_len_iter(
-            indexes
-                .iter()
-                .map(|index| index.null_count.map(|x| x as u64)),
-        ),
-    }
-}
-
-pub fn deserialize_id<T: NativeType>(
-    indexes: &[PageIndex<T>],
-    data_type: ArrowDataType,
-) -> ColumnPageStatistics {
-    ColumnPageStatistics {
-        min: deserialize_id_s(indexes.iter().map(|index| index.min), data_type.clone()),
-        max: deserialize_id_s(indexes.iter().map(|index| index.max), data_type),
-        null_count: PrimitiveArray::from_trusted_len_iter(
-            indexes
-                .iter()
-                .map(|index| index.null_count.map(|x| x as u64)),
-        ),
-    }
-}
diff --git a/crates/polars-parquet/src/arrow/read/mod.rs b/crates/polars-parquet/src/arrow/read/mod.rs
index b7fe702e8107..9c445d7a46ce 100644
--- a/crates/polars-parquet/src/arrow/read/mod.rs
+++ b/crates/polars-parquet/src/arrow/read/mod.rs
@@ -2,7 +2,6 @@
 #![allow(clippy::type_complexity)]
 
 mod deserialize;
-pub mod indexes;
 pub mod schema;
 pub mod statistics;
 
@@ -28,8 +27,7 @@ pub use crate::parquet::{
     metadata::{ColumnChunkMetaData, ColumnDescriptor, RowGroupMetaData},
     page::{CompressedDataPage, DataPageHeader, Page},
     read::{
-        decompress, get_column_iterator, read_columns_indexes as _read_columns_indexes,
-        read_metadata as _read_metadata, read_pages_locations, BasicDecompressor,
+        decompress, get_column_iterator, read_metadata as _read_metadata, BasicDecompressor,
         MutStreamingIterator, PageReader, ReadColumnIterator, State,
     },
     schema::types::{
@@ -40,18 +38,6 @@ pub use crate::parquet::{
     FallibleStreamingIterator,
 };
 
-/// Returns all [`ColumnChunkMetaData`] associated to `field_name`.
-/// For non-nested parquet types, this returns a single column
-pub fn get_field_columns<'a>(
-    columns: &'a [ColumnChunkMetaData],
-    field_name: &str,
-) -> Vec<&'a ColumnChunkMetaData> {
-    columns
-        .iter()
-        .filter(|x| x.descriptor().path_in_schema[0] == field_name)
-        .collect()
-}
-
 /// Returns all [`ColumnChunkMetaData`] associated to `field_name`.
 /// For non-nested parquet types, this returns a single column
 pub fn get_field_pages<'a, T>(
diff --git a/crates/polars-parquet/src/arrow/read/schema/mod.rs b/crates/polars-parquet/src/arrow/read/schema/mod.rs
index a52498557d9e..34fb195a4eaa 100644
--- a/crates/polars-parquet/src/arrow/read/schema/mod.rs
+++ b/crates/polars-parquet/src/arrow/read/schema/mod.rs
@@ -33,9 +33,11 @@ impl Default for SchemaInferenceOptions {
     }
 }
 
-/// Infers a [`ArrowSchema`] from parquet's [`FileMetaData`]. This first looks for the metadata key
-/// `"ARROW:schema"`; if it does not exist, it converts the parquet types declared in the
-/// file's parquet schema to Arrow's equivalent.
+/// Infers a [`ArrowSchema`] from parquet's [`FileMetaData`].
+///
+/// This first looks for the metadata key `"ARROW:schema"`; if it does not exist, it converts the
+/// Parquet types declared in the file's Parquet schema to Arrow's equivalent.
+///
 /// # Error
 /// This function errors iff the key `"ARROW:schema"` exists but is not correctly encoded,
 /// indicating that that the file's arrow metadata was incorrectly written.
diff --git a/crates/polars-parquet/src/arrow/read/statistics/mod.rs b/crates/polars-parquet/src/arrow/read/statistics/mod.rs
index ea3b34fb8631..0face3c8b358 100644
--- a/crates/polars-parquet/src/arrow/read/statistics/mod.rs
+++ b/crates/polars-parquet/src/arrow/read/statistics/mod.rs
@@ -8,12 +8,12 @@ use arrow::with_match_primitive_type_full;
 use ethnum::I256;
 use polars_error::{polars_bail, PolarsResult};
 
-use crate::parquet::metadata::RowGroupMetaData;
 use crate::parquet::schema::types::{
     PhysicalType as ParquetPhysicalType, PrimitiveType as ParquetPrimitiveType,
 };
 use crate::parquet::statistics::{PrimitiveStatistics, Statistics as ParquetStatistics};
 use crate::parquet::types::int96_to_i64_ns;
+use crate::read::ColumnChunkMetaData;
 
 mod binary;
 mod binview;
@@ -28,7 +28,6 @@ mod struct_;
 mod utf8;
 
 use self::list::DynMutableListArray;
-use super::get_field_columns;
 
 /// Arrow-deserialized parquet Statistics of a file
 #[derive(Debug, PartialEq)]
@@ -543,12 +542,11 @@ fn push(
 ///
 /// # Errors
 /// This function errors if the deserialization of the statistics fails (e.g. invalid utf8)
-pub fn deserialize(field: &Field, row_group: &RowGroupMetaData) -> PolarsResult<Statistics> {
+pub fn deserialize(field: &Field, field_md: &[&ColumnChunkMetaData]) -> PolarsResult<Statistics> {
     let mut statistics = MutableStatistics::try_new(field)?;
 
-    let columns = get_field_columns(row_group.columns(), field.name.as_ref());
-    let mut stats = columns
-        .into_iter()
+    let mut stats = field_md
+        .iter()
         .map(|column| {
             Ok((
                 column.statistics().transpose()?,
diff --git a/crates/polars-parquet/src/arrow/write/binary/basic.rs b/crates/polars-parquet/src/arrow/write/binary/basic.rs
index c977a4e4939c..0e7cbdfb37b1 100644
--- a/crates/polars-parquet/src/arrow/write/binary/basic.rs
+++ b/crates/polars-parquet/src/arrow/write/binary/basic.rs
@@ -9,7 +9,7 @@ use crate::parquet::encoding::{delta_bitpacked, Encoding};
 use crate::parquet::schema::types::PrimitiveType;
 use crate::parquet::statistics::{BinaryStatistics, ParquetStatistics};
 use crate::write::utils::invalid_encoding;
-use crate::write::{Page, StatisticsOptions};
+use crate::write::{EncodeNullability, Page, StatisticsOptions};
 
 pub(crate) fn encode_non_null_values<'a, I: Iterator<Item = &'a [u8]>>(
     iter: I,
@@ -23,14 +23,27 @@ pub(crate) fn encode_non_null_values<'a, I: Iterator<Item = &'a [u8]>>(
     })
 }
 
-pub(crate) fn encode_plain<O: Offset>(array: &BinaryArray<O>, buffer: &mut Vec<u8>) {
-    let len_before = buffer.len();
-    let capacity =
-        array.get_values_size() + (array.len() - array.null_count()) * std::mem::size_of::<u32>();
-    buffer.reserve(capacity);
-    encode_non_null_values(array.non_null_values_iter(), buffer);
-    // Ensure we allocated properly.
-    debug_assert_eq!(buffer.len() - len_before, capacity);
+pub(crate) fn encode_plain<O: Offset>(
+    array: &BinaryArray<O>,
+    options: EncodeNullability,
+    buffer: &mut Vec<u8>,
+) {
+    if options.is_optional() && array.validity().is_some() {
+        let len_before = buffer.len();
+        let capacity = array.get_values_size()
+            + (array.len() - array.null_count()) * std::mem::size_of::<u32>();
+        buffer.reserve(capacity);
+        encode_non_null_values(array.non_null_values_iter(), buffer);
+        // Ensure we allocated properly.
+        debug_assert_eq!(buffer.len() - len_before, capacity);
+    } else {
+        let len_before = buffer.len();
+        let capacity = array.get_values_size() + array.len() * std::mem::size_of::<u32>();
+        buffer.reserve(capacity);
+        encode_non_null_values(array.values_iter(), buffer);
+        // Ensure we allocated properly.
+        debug_assert_eq!(buffer.len() - len_before, capacity);
+    }
 }
 
 pub fn array_to_page<O: Offset>(
@@ -41,6 +54,7 @@ pub fn array_to_page<O: Offset>(
 ) -> PolarsResult<Page> {
     let validity = array.validity();
     let is_optional = is_nullable(&type_.field_info);
+    let encode_options = EncodeNullability::new(is_optional);
 
     let mut buffer = vec![];
     utils::write_def_levels(
@@ -54,12 +68,12 @@ pub fn array_to_page<O: Offset>(
     let definition_levels_byte_length = buffer.len();
 
     match encoding {
-        Encoding::Plain => encode_plain(array, &mut buffer),
+        Encoding::Plain => encode_plain(array, encode_options, &mut buffer),
         Encoding::DeltaLengthByteArray => encode_delta(
             array.values(),
             array.offsets().buffer(),
             array.validity(),
-            is_optional,
+            encode_options,
             &mut buffer,
         ),
         _ => return Err(invalid_encoding(encoding, array.data_type())),
@@ -113,10 +127,10 @@ pub(crate) fn encode_delta<O: Offset>(
     values: &[u8],
     offsets: &[O],
     validity: Option<&Bitmap>,
-    is_optional: bool,
+    options: EncodeNullability,
     buffer: &mut Vec<u8>,
 ) {
-    if is_optional {
+    if options.is_optional() && validity.is_some() {
         if let Some(validity) = validity {
             let lengths = offsets
                 .windows(2)
diff --git a/crates/polars-parquet/src/arrow/write/binary/nested.rs b/crates/polars-parquet/src/arrow/write/binary/nested.rs
index afc487f42333..afb87200da53 100644
--- a/crates/polars-parquet/src/arrow/write/binary/nested.rs
+++ b/crates/polars-parquet/src/arrow/write/binary/nested.rs
@@ -8,6 +8,8 @@ use crate::arrow::write::Nested;
 use crate::parquet::encoding::Encoding;
 use crate::parquet::page::DataPage;
 use crate::parquet::schema::types::PrimitiveType;
+use crate::read::schema::is_nullable;
+use crate::write::EncodeNullability;
 
 pub fn array_to_page<O>(
     array: &BinaryArray<O>,
@@ -18,11 +20,14 @@ pub fn array_to_page<O>(
 where
     O: Offset,
 {
+    let is_optional = is_nullable(&type_.field_info);
+    let encode_options = EncodeNullability::new(is_optional);
+
     let mut buffer = vec![];
     let (repetition_levels_byte_length, definition_levels_byte_length) =
         nested::write_rep_and_def(options.version, nested, &mut buffer)?;
 
-    encode_plain(array, &mut buffer);
+    encode_plain(array, encode_options, &mut buffer);
 
     let statistics = if options.has_statistics() {
         Some(build_statistics(array, type_.clone(), &options.statistics))
diff --git a/crates/polars-parquet/src/arrow/write/binview/basic.rs b/crates/polars-parquet/src/arrow/write/binview/basic.rs
index c7059b63c99e..251a336b2177 100644
--- a/crates/polars-parquet/src/arrow/write/binview/basic.rs
+++ b/crates/polars-parquet/src/arrow/write/binview/basic.rs
@@ -8,29 +8,56 @@ use crate::parquet::statistics::{BinaryStatistics, ParquetStatistics};
 use crate::read::schema::is_nullable;
 use crate::write::binary::encode_non_null_values;
 use crate::write::utils::invalid_encoding;
-use crate::write::{utils, Encoding, Page, StatisticsOptions, WriteOptions};
+use crate::write::{utils, EncodeNullability, Encoding, Page, StatisticsOptions, WriteOptions};
 
-pub(crate) fn encode_plain(array: &BinaryViewArray, buffer: &mut Vec<u8>) {
-    let capacity =
-        array.total_bytes_len() + (array.len() - array.null_count()) * std::mem::size_of::<u32>();
+pub(crate) fn encode_plain(
+    array: &BinaryViewArray,
+    options: EncodeNullability,
+    buffer: &mut Vec<u8>,
+) {
+    if options.is_optional() && array.validity().is_some() {
+        let capacity = array.total_bytes_len()
+            + (array.len() - array.null_count()) * std::mem::size_of::<u32>();
+
+        let len_before = buffer.len();
+        buffer.reserve(capacity);
+
+        encode_non_null_values(array.non_null_values_iter(), buffer);
+        // Append the non-null values.
+        debug_assert_eq!(buffer.len() - len_before, capacity);
+    } else {
+        let capacity = array.total_bytes_len() + array.len() * std::mem::size_of::<u32>();
 
-    let len_before = buffer.len();
-    buffer.reserve(capacity);
+        let len_before = buffer.len();
+        buffer.reserve(capacity);
 
-    encode_non_null_values(array.non_null_values_iter(), buffer);
-    // Append the non-null values.
-    debug_assert_eq!(buffer.len() - len_before, capacity);
+        encode_non_null_values(array.values_iter(), buffer);
+        // Append the non-null values.
+        debug_assert_eq!(buffer.len() - len_before, capacity);
+    }
 }
 
-pub(crate) fn encode_delta(array: &BinaryViewArray, buffer: &mut Vec<u8>) {
-    let lengths = utils::ExactSizedIter::new(
-        array.non_null_views_iter().map(|v| v.length as i64),
-        array.len() - array.null_count(),
-    );
-    delta_bitpacked::encode(lengths, buffer, 1);
+pub(crate) fn encode_delta(
+    array: &BinaryViewArray,
+    options: EncodeNullability,
+    buffer: &mut Vec<u8>,
+) {
+    if options.is_optional() && array.validity().is_some() {
+        let lengths = utils::ExactSizedIter::new(
+            array.non_null_views_iter().map(|v| v.length as i64),
+            array.len() - array.null_count(),
+        );
+        delta_bitpacked::encode(lengths, buffer, 1);
+
+        for slice in array.non_null_values_iter() {
+            buffer.extend_from_slice(slice)
+        }
+    } else {
+        let lengths =
+            utils::ExactSizedIter::new(array.views().iter().map(|v| v.length as i64), array.len());
+        delta_bitpacked::encode(lengths, buffer, 1);
 
-    for slice in array.non_null_values_iter() {
-        buffer.extend_from_slice(slice)
+        buffer.extend(array.values_iter().flatten());
     }
 }
 
@@ -41,6 +68,7 @@ pub fn array_to_page(
     encoding: Encoding,
 ) -> PolarsResult<Page> {
     let is_optional = is_nullable(&type_.field_info);
+    let encode_options = EncodeNullability::new(is_optional);
 
     let mut buffer = vec![];
     // TODO! reserve capacity
@@ -55,8 +83,8 @@ pub fn array_to_page(
     let definition_levels_byte_length = buffer.len();
 
     match encoding {
-        Encoding::Plain => encode_plain(array, &mut buffer),
-        Encoding::DeltaLengthByteArray => encode_delta(array, &mut buffer),
+        Encoding::Plain => encode_plain(array, encode_options, &mut buffer),
+        Encoding::DeltaLengthByteArray => encode_delta(array, encode_options, &mut buffer),
         _ => return Err(invalid_encoding(encoding, array.data_type())),
     }
 
diff --git a/crates/polars-parquet/src/arrow/write/binview/nested.rs b/crates/polars-parquet/src/arrow/write/binview/nested.rs
index 9e76b23e6b19..16165a7d4299 100644
--- a/crates/polars-parquet/src/arrow/write/binview/nested.rs
+++ b/crates/polars-parquet/src/arrow/write/binview/nested.rs
@@ -7,6 +7,8 @@ use crate::arrow::write::Nested;
 use crate::parquet::encoding::Encoding;
 use crate::parquet::page::DataPage;
 use crate::parquet::schema::types::PrimitiveType;
+use crate::read::schema::is_nullable;
+use crate::write::EncodeNullability;
 
 pub fn array_to_page(
     array: &BinaryViewArray,
@@ -14,11 +16,14 @@ pub fn array_to_page(
     type_: PrimitiveType,
     nested: &[Nested],
 ) -> PolarsResult<DataPage> {
+    let is_optional = is_nullable(&type_.field_info);
+    let encode_options = EncodeNullability::new(is_optional);
+
     let mut buffer = vec![];
     let (repetition_levels_byte_length, definition_levels_byte_length) =
         nested::write_rep_and_def(options.version, nested, &mut buffer)?;
 
-    encode_plain(array, &mut buffer);
+    encode_plain(array, encode_options, &mut buffer);
 
     let statistics = if options.has_statistics() {
         Some(build_statistics(array, type_.clone(), &options.statistics))
diff --git a/crates/polars-parquet/src/arrow/write/boolean/basic.rs b/crates/polars-parquet/src/arrow/write/boolean/basic.rs
index 0735ba2f4d6c..e338ca0c3d12 100644
--- a/crates/polars-parquet/src/arrow/write/boolean/basic.rs
+++ b/crates/polars-parquet/src/arrow/write/boolean/basic.rs
@@ -8,7 +8,7 @@ use crate::parquet::encoding::Encoding;
 use crate::parquet::page::DataPage;
 use crate::parquet::schema::types::PrimitiveType;
 use crate::parquet::statistics::{BooleanStatistics, ParquetStatistics};
-use crate::write::StatisticsOptions;
+use crate::write::{EncodeNullability, StatisticsOptions};
 
 fn encode(iterator: impl Iterator<Item = bool>, buffer: &mut Vec<u8>) -> PolarsResult<()> {
     // encode values using bitpacking
@@ -20,10 +20,10 @@ fn encode(iterator: impl Iterator<Item = bool>, buffer: &mut Vec<u8>) -> PolarsR
 
 pub(super) fn encode_plain(
     array: &BooleanArray,
-    is_optional: bool,
+    encode_options: EncodeNullability,
     buffer: &mut Vec<u8>,
 ) -> PolarsResult<()> {
-    if is_optional && array.validity().is_some() {
+    if encode_options.is_optional() && array.validity().is_some() {
         encode(array.non_null_values_iter(), buffer)
     } else {
         encode(array.values().iter(), buffer)
@@ -32,13 +32,13 @@ pub(super) fn encode_plain(
 
 pub(super) fn encode_hybrid_rle(
     array: &BooleanArray,
-    is_optional: bool,
+    encode_options: EncodeNullability,
     buffer: &mut Vec<u8>,
 ) -> PolarsResult<()> {
     buffer.extend_from_slice(&[0; 4]);
     let start = buffer.len();
 
-    if is_optional && array.validity().is_some() {
+    if encode_options.is_optional() && array.validity().is_some() {
         hybrid_rle::encode(buffer, array.non_null_values_iter(), 1)?;
     } else {
         hybrid_rle::encode(buffer, array.values().iter(), 1)?;
@@ -60,6 +60,7 @@ pub fn array_to_page(
     encoding: Encoding,
 ) -> PolarsResult<DataPage> {
     let is_optional = is_nullable(&type_.field_info);
+    let encode_nullability = EncodeNullability::new(is_optional);
 
     let validity = array.validity();
 
@@ -75,8 +76,8 @@ pub fn array_to_page(
     let definition_levels_byte_length = buffer.len();
 
     match encoding {
-        Encoding::Plain => encode_plain(array, is_optional, &mut buffer)?,
-        Encoding::Rle => encode_hybrid_rle(array, is_optional, &mut buffer)?,
+        Encoding::Plain => encode_plain(array, encode_nullability, &mut buffer)?,
+        Encoding::Rle => encode_hybrid_rle(array, encode_nullability, &mut buffer)?,
         other => polars_bail!(nyi = "Encoding boolean as {other:?}"),
     }
 
diff --git a/crates/polars-parquet/src/arrow/write/boolean/nested.rs b/crates/polars-parquet/src/arrow/write/boolean/nested.rs
index 3560bc167369..082197202588 100644
--- a/crates/polars-parquet/src/arrow/write/boolean/nested.rs
+++ b/crates/polars-parquet/src/arrow/write/boolean/nested.rs
@@ -1,7 +1,7 @@
 use arrow::array::{Array, BooleanArray};
 use polars_error::PolarsResult;
 
-use super::super::{nested, utils, WriteOptions};
+use super::super::{nested, utils, EncodeNullability, WriteOptions};
 use super::basic::{build_statistics, encode_plain};
 use crate::arrow::read::schema::is_nullable;
 use crate::arrow::write::Nested;
@@ -16,12 +16,13 @@ pub fn array_to_page(
     nested: &[Nested],
 ) -> PolarsResult<DataPage> {
     let is_optional = is_nullable(&type_.field_info);
+    let encode_options = EncodeNullability::new(is_optional);
 
     let mut buffer = vec![];
     let (repetition_levels_byte_length, definition_levels_byte_length) =
         nested::write_rep_and_def(options.version, nested, &mut buffer)?;
 
-    encode_plain(array, is_optional, &mut buffer)?;
+    encode_plain(array, encode_options, &mut buffer)?;
 
     let statistics = if options.has_statistics() {
         Some(build_statistics(array, &options.statistics))
diff --git a/crates/polars-parquet/src/arrow/write/dictionary.rs b/crates/polars-parquet/src/arrow/write/dictionary.rs
index 4a507557d36b..096765446f88 100644
--- a/crates/polars-parquet/src/arrow/write/dictionary.rs
+++ b/crates/polars-parquet/src/arrow/write/dictionary.rs
@@ -19,7 +19,7 @@ use super::pages::PrimitiveNested;
 use super::primitive::{
     build_statistics as primitive_build_statistics, encode_plain as primitive_encode_plain,
 };
-use super::{binview, nested, Nested, WriteOptions};
+use super::{binview, nested, EncodeNullability, Nested, WriteOptions};
 use crate::arrow::read::schema::is_nullable;
 use crate::arrow::write::{slice_nested_leaf, utils};
 use crate::parquet::encoding::hybrid_rle::encode;
@@ -313,7 +313,8 @@ macro_rules! dyn_prim {
     ($from:ty, $to:ty, $array:expr, $options:expr, $type_:expr) => {{
         let values = $array.values().as_any().downcast_ref().unwrap();
 
-        let buffer = primitive_encode_plain::<$from, $to>(values, false, vec![]);
+        let buffer =
+            primitive_encode_plain::<$from, $to>(values, EncodeNullability::new(false), vec![]);
 
         let stats: Option<ParquetStatistics> = if !$options.statistics.is_empty() {
             let mut stats = primitive_build_statistics::<$from, $to>(
@@ -343,140 +344,144 @@ pub fn array_to_pages<K: DictionaryKey>(
     match encoding {
         Encoding::PlainDictionary | Encoding::RleDictionary => {
             // write DictPage
-            let (dict_page, mut statistics): (_, Option<ParquetStatistics>) =
-                match array.values().data_type().to_logical_type() {
-                    ArrowDataType::Int8 => dyn_prim!(i8, i32, array, options, type_),
-                    ArrowDataType::Int16 => dyn_prim!(i16, i32, array, options, type_),
-                    ArrowDataType::Int32 | ArrowDataType::Date32 | ArrowDataType::Time32(_) => {
-                        dyn_prim!(i32, i32, array, options, type_)
-                    },
-                    ArrowDataType::Int64
-                    | ArrowDataType::Date64
-                    | ArrowDataType::Time64(_)
-                    | ArrowDataType::Timestamp(_, _)
-                    | ArrowDataType::Duration(_) => dyn_prim!(i64, i64, array, options, type_),
-                    ArrowDataType::UInt8 => dyn_prim!(u8, i32, array, options, type_),
-                    ArrowDataType::UInt16 => dyn_prim!(u16, i32, array, options, type_),
-                    ArrowDataType::UInt32 => dyn_prim!(u32, i32, array, options, type_),
-                    ArrowDataType::UInt64 => dyn_prim!(u64, i64, array, options, type_),
-                    ArrowDataType::Float32 => dyn_prim!(f32, f32, array, options, type_),
-                    ArrowDataType::Float64 => dyn_prim!(f64, f64, array, options, type_),
-                    ArrowDataType::LargeUtf8 => {
-                        let array = arrow::compute::cast::cast(
-                            array.values().as_ref(),
-                            &ArrowDataType::LargeBinary,
-                            Default::default(),
-                        )
+            let (dict_page, mut statistics): (_, Option<ParquetStatistics>) = match array
+                .values()
+                .data_type()
+                .to_logical_type()
+            {
+                ArrowDataType::Int8 => dyn_prim!(i8, i32, array, options, type_),
+                ArrowDataType::Int16 => dyn_prim!(i16, i32, array, options, type_),
+                ArrowDataType::Int32 | ArrowDataType::Date32 | ArrowDataType::Time32(_) => {
+                    dyn_prim!(i32, i32, array, options, type_)
+                },
+                ArrowDataType::Int64
+                | ArrowDataType::Date64
+                | ArrowDataType::Time64(_)
+                | ArrowDataType::Timestamp(_, _)
+                | ArrowDataType::Duration(_) => dyn_prim!(i64, i64, array, options, type_),
+                ArrowDataType::UInt8 => dyn_prim!(u8, i32, array, options, type_),
+                ArrowDataType::UInt16 => dyn_prim!(u16, i32, array, options, type_),
+                ArrowDataType::UInt32 => dyn_prim!(u32, i32, array, options, type_),
+                ArrowDataType::UInt64 => dyn_prim!(u64, i64, array, options, type_),
+                ArrowDataType::Float32 => dyn_prim!(f32, f32, array, options, type_),
+                ArrowDataType::Float64 => dyn_prim!(f64, f64, array, options, type_),
+                ArrowDataType::LargeUtf8 => {
+                    let array = arrow::compute::cast::cast(
+                        array.values().as_ref(),
+                        &ArrowDataType::LargeBinary,
+                        Default::default(),
+                    )
+                    .unwrap();
+                    let array = array.as_any().downcast_ref().unwrap();
+
+                    let mut buffer = vec![];
+                    binary_encode_plain::<i64>(array, EncodeNullability::Required, &mut buffer);
+                    let stats = if options.has_statistics() {
+                        Some(binary_build_statistics(
+                            array,
+                            type_.clone(),
+                            &options.statistics,
+                        ))
+                    } else {
+                        None
+                    };
+                    (
+                        DictPage::new(CowBuffer::Owned(buffer), array.len(), false),
+                        stats,
+                    )
+                },
+                ArrowDataType::BinaryView => {
+                    let array = array
+                        .values()
+                        .as_any()
+                        .downcast_ref::<BinaryViewArray>()
                         .unwrap();
-                        let array = array.as_any().downcast_ref().unwrap();
-
-                        let mut buffer = vec![];
-                        binary_encode_plain::<i64>(array, &mut buffer);
-                        let stats = if options.has_statistics() {
-                            Some(binary_build_statistics(
-                                array,
-                                type_.clone(),
-                                &options.statistics,
-                            ))
-                        } else {
-                            None
-                        };
-                        (
-                            DictPage::new(CowBuffer::Owned(buffer), array.len(), false),
-                            stats,
-                        )
-                    },
-                    ArrowDataType::BinaryView => {
-                        let array = array
-                            .values()
-                            .as_any()
-                            .downcast_ref::<BinaryViewArray>()
-                            .unwrap();
-                        let mut buffer = vec![];
-                        binview::encode_plain(array, &mut buffer);
-
-                        let stats = if options.has_statistics() {
-                            Some(binview::build_statistics(
-                                array,
-                                type_.clone(),
-                                &options.statistics,
-                            ))
-                        } else {
-                            None
-                        };
-                        (
-                            DictPage::new(CowBuffer::Owned(buffer), array.len(), false),
-                            stats,
-                        )
-                    },
-                    ArrowDataType::Utf8View => {
-                        let array = array
-                            .values()
-                            .as_any()
-                            .downcast_ref::<Utf8ViewArray>()
-                            .unwrap()
-                            .to_binview();
-                        let mut buffer = vec![];
-                        binview::encode_plain(&array, &mut buffer);
-
-                        let stats = if options.has_statistics() {
-                            Some(binview::build_statistics(
-                                &array,
-                                type_.clone(),
-                                &options.statistics,
-                            ))
-                        } else {
-                            None
-                        };
-                        (
-                            DictPage::new(CowBuffer::Owned(buffer), array.len(), false),
-                            stats,
-                        )
-                    },
-                    ArrowDataType::LargeBinary => {
-                        let values = array.values().as_any().downcast_ref().unwrap();
-
-                        let mut buffer = vec![];
-                        binary_encode_plain::<i64>(values, &mut buffer);
-                        let stats = if options.has_statistics() {
-                            Some(binary_build_statistics(
-                                values,
-                                type_.clone(),
-                                &options.statistics,
-                            ))
-                        } else {
-                            None
-                        };
-                        (
-                            DictPage::new(CowBuffer::Owned(buffer), values.len(), false),
-                            stats,
-                        )
-                    },
-                    ArrowDataType::FixedSizeBinary(_) => {
-                        let mut buffer = vec![];
-                        let array = array.values().as_any().downcast_ref().unwrap();
-                        fixed_binary_encode_plain(array, false, &mut buffer);
-                        let stats = if options.has_statistics() {
-                            let stats = fixed_binary_build_statistics(
-                                array,
-                                type_.clone(),
-                                &options.statistics,
-                            );
-                            Some(stats.serialize())
-                        } else {
-                            None
-                        };
-                        (
-                            DictPage::new(CowBuffer::Owned(buffer), array.len(), false),
-                            stats,
-                        )
-                    },
-                    other => {
-                        polars_bail!(nyi =
+                    let mut buffer = vec![];
+                    binview::encode_plain(array, EncodeNullability::Required, &mut buffer);
+
+                    let stats = if options.has_statistics() {
+                        Some(binview::build_statistics(
+                            array,
+                            type_.clone(),
+                            &options.statistics,
+                        ))
+                    } else {
+                        None
+                    };
+                    (
+                        DictPage::new(CowBuffer::Owned(buffer), array.len(), false),
+                        stats,
+                    )
+                },
+                ArrowDataType::Utf8View => {
+                    let array = array
+                        .values()
+                        .as_any()
+                        .downcast_ref::<Utf8ViewArray>()
+                        .unwrap()
+                        .to_binview();
+                    let mut buffer = vec![];
+                    binview::encode_plain(&array, EncodeNullability::Required, &mut buffer);
+
+                    let stats = if options.has_statistics() {
+                        Some(binview::build_statistics(
+                            &array,
+                            type_.clone(),
+                            &options.statistics,
+                        ))
+                    } else {
+                        None
+                    };
+                    (
+                        DictPage::new(CowBuffer::Owned(buffer), array.len(), false),
+                        stats,
+                    )
+                },
+                ArrowDataType::LargeBinary => {
+                    let values = array.values().as_any().downcast_ref().unwrap();
+
+                    let mut buffer = vec![];
+                    binary_encode_plain::<i64>(values, EncodeNullability::Required, &mut buffer);
+                    let stats = if options.has_statistics() {
+                        Some(binary_build_statistics(
+                            values,
+                            type_.clone(),
+                            &options.statistics,
+                        ))
+                    } else {
+                        None
+                    };
+                    (
+                        DictPage::new(CowBuffer::Owned(buffer), values.len(), false),
+                        stats,
+                    )
+                },
+                ArrowDataType::FixedSizeBinary(_) => {
+                    let mut buffer = vec![];
+                    let array = array.values().as_any().downcast_ref().unwrap();
+                    fixed_binary_encode_plain(array, EncodeNullability::Required, &mut buffer);
+                    let stats = if options.has_statistics() {
+                        let stats = fixed_binary_build_statistics(
+                            array,
+                            type_.clone(),
+                            &options.statistics,
+                        );
+                        Some(stats.serialize())
+                    } else {
+                        None
+                    };
+                    (
+                        DictPage::new(CowBuffer::Owned(buffer), array.len(), false),
+                        stats,
+                    )
+                },
+                other => {
+                    polars_bail!(
+                        nyi =
                             "Writing dictionary arrays to parquet only support data type {other:?}"
-                        )
-                    },
-                };
+                    )
+                },
+            };
 
             if let Some(stats) = &mut statistics {
                 stats.null_count = Some(array.null_count() as i64)
diff --git a/crates/polars-parquet/src/arrow/write/fixed_len_bytes.rs b/crates/polars-parquet/src/arrow/write/fixed_len_bytes.rs
index bf15c0ab50cc..9277b9c78a98 100644
--- a/crates/polars-parquet/src/arrow/write/fixed_len_bytes.rs
+++ b/crates/polars-parquet/src/arrow/write/fixed_len_bytes.rs
@@ -3,16 +3,20 @@ use arrow::types::i256;
 use polars_error::PolarsResult;
 
 use super::binary::ord_binary;
-use super::{utils, StatisticsOptions, WriteOptions};
+use super::{utils, EncodeNullability, StatisticsOptions, WriteOptions};
 use crate::arrow::read::schema::is_nullable;
 use crate::parquet::encoding::Encoding;
 use crate::parquet::page::DataPage;
 use crate::parquet::schema::types::PrimitiveType;
 use crate::parquet::statistics::FixedLenStatistics;
 
-pub(crate) fn encode_plain(array: &FixedSizeBinaryArray, is_optional: bool, buffer: &mut Vec<u8>) {
+pub(crate) fn encode_plain(
+    array: &FixedSizeBinaryArray,
+    options: EncodeNullability,
+    buffer: &mut Vec<u8>,
+) {
     // append the non-null values
-    if is_optional {
+    if options.is_optional() && array.validity().is_some() {
         array.iter().for_each(|x| {
             if let Some(x) = x {
                 buffer.extend_from_slice(x);
@@ -30,6 +34,8 @@ pub fn array_to_page(
     statistics: Option<FixedLenStatistics>,
 ) -> PolarsResult<DataPage> {
     let is_optional = is_nullable(&type_.field_info);
+    let encode_options = EncodeNullability::new(is_optional);
+
     let validity = array.validity();
 
     let mut buffer = vec![];
@@ -43,7 +49,7 @@ pub fn array_to_page(
 
     let definition_levels_byte_length = buffer.len();
 
-    encode_plain(array, is_optional, &mut buffer);
+    encode_plain(array, encode_options, &mut buffer);
 
     utils::build_plain_page(
         buffer,
diff --git a/crates/polars-parquet/src/arrow/write/mod.rs b/crates/polars-parquet/src/arrow/write/mod.rs
index 950e5fc16837..abdaab87bb3f 100644
--- a/crates/polars-parquet/src/arrow/write/mod.rs
+++ b/crates/polars-parquet/src/arrow/write/mod.rs
@@ -71,6 +71,13 @@ impl Default for StatisticsOptions {
     }
 }
 
+/// Options to encode an array
+#[derive(Clone, Copy)]
+pub enum EncodeNullability {
+    Required,
+    Optional,
+}
+
 /// Currently supported options to write to parquet
 #[derive(Debug, Clone, Copy, PartialEq, Eq)]
 pub struct WriteOptions {
@@ -131,6 +138,20 @@ impl WriteOptions {
     }
 }
 
+impl EncodeNullability {
+    const fn new(is_optional: bool) -> Self {
+        if is_optional {
+            Self::Optional
+        } else {
+            Self::Required
+        }
+    }
+
+    fn is_optional(self) -> bool {
+        matches!(self, Self::Optional)
+    }
+}
+
 /// returns offset and length to slice the leaf values
 pub fn slice_nested_leaf(nested: &[Nested]) -> (usize, usize) {
     // find the deepest recursive dremel structure as that one determines how many values we must
@@ -1003,6 +1024,7 @@ fn transverse_recursive<T, F: Fn(&ArrowDataType) -> T + Clone>(
 
 /// Transverses the `data_type` up to its (parquet) columns and returns a vector of
 /// items based on `map`.
+///
 /// This is used to assign an [`Encoding`] to every parquet column based on the columns' type (see example)
 pub fn transverse<T, F: Fn(&ArrowDataType) -> T + Clone>(
     data_type: &ArrowDataType,
diff --git a/crates/polars-parquet/src/arrow/write/primitive/basic.rs b/crates/polars-parquet/src/arrow/write/primitive/basic.rs
index 2c6c137ce220..d970e3659dcb 100644
--- a/crates/polars-parquet/src/arrow/write/primitive/basic.rs
+++ b/crates/polars-parquet/src/arrow/write/primitive/basic.rs
@@ -13,11 +13,11 @@ use crate::parquet::schema::types::PrimitiveType;
 use crate::parquet::statistics::PrimitiveStatistics;
 use crate::parquet::types::NativeType as ParquetNativeType;
 use crate::read::Page;
-use crate::write::StatisticsOptions;
+use crate::write::{EncodeNullability, StatisticsOptions};
 
 pub(crate) fn encode_plain<T, P>(
     array: &PrimitiveArray<T>,
-    is_optional: bool,
+    options: EncodeNullability,
     mut buffer: Vec<u8>,
 ) -> Vec<u8>
 where
@@ -25,6 +25,8 @@ where
     P: ParquetNativeType,
     T: num_traits::AsPrimitive<P>,
 {
+    let is_optional = options.is_optional();
+
     if is_optional {
         // append the non-null values
         let validity = array.validity();
@@ -33,10 +35,10 @@ where
             let null_count = validity.unset_bits();
 
             if null_count > 0 {
-                let values = array.values().as_slice();
                 let mut iter = validity.iter();
+                let values = array.values().as_slice();
 
-                buffer.reserve(std::mem::size_of::<P>() * (array.len() - null_count));
+                buffer.reserve(std::mem::size_of::<P::Bytes>() * (array.len() - null_count));
 
                 let mut offset = 0;
                 let mut remaining_valid = array.len() - null_count;
@@ -72,7 +74,7 @@ where
 
 pub(crate) fn encode_delta<T, P>(
     array: &PrimitiveArray<T>,
-    is_optional: bool,
+    options: EncodeNullability,
     mut buffer: Vec<u8>,
 ) -> Vec<u8>
 where
@@ -81,6 +83,8 @@ where
     T: num_traits::AsPrimitive<P>,
     P: num_traits::AsPrimitive<i64>,
 {
+    let is_optional = options.is_optional();
+
     if is_optional {
         // append the non-null values
         let iterator = array.non_null_values_iter().map(|x| {
@@ -135,7 +139,7 @@ where
     .map(Page::Data)
 }
 
-pub fn array_to_page<T, P, F: Fn(&PrimitiveArray<T>, bool, Vec<u8>) -> Vec<u8>>(
+pub fn array_to_page<T, P, F: Fn(&PrimitiveArray<T>, EncodeNullability, Vec<u8>) -> Vec<u8>>(
     array: &PrimitiveArray<T>,
     options: WriteOptions,
     type_: PrimitiveType,
@@ -149,6 +153,7 @@ where
     T: num_traits::AsPrimitive<P>,
 {
     let is_optional = is_nullable(&type_.field_info);
+    let encode_options = EncodeNullability::new(is_optional);
 
     let validity = array.validity();
 
@@ -163,7 +168,7 @@ where
 
     let definition_levels_byte_length = buffer.len();
 
-    let buffer = encode(array, is_optional, buffer);
+    let buffer = encode(array, encode_options, buffer);
 
     let statistics = if options.has_statistics() {
         Some(build_statistics(array, type_.clone(), &options.statistics).serialize())
diff --git a/crates/polars-parquet/src/arrow/write/primitive/nested.rs b/crates/polars-parquet/src/arrow/write/primitive/nested.rs
index 918afa6a4dc6..b5391263025e 100644
--- a/crates/polars-parquet/src/arrow/write/primitive/nested.rs
+++ b/crates/polars-parquet/src/arrow/write/primitive/nested.rs
@@ -10,6 +10,7 @@ use crate::parquet::encoding::Encoding;
 use crate::parquet::page::DataPage;
 use crate::parquet::schema::types::PrimitiveType;
 use crate::parquet::types::NativeType;
+use crate::write::EncodeNullability;
 
 pub fn array_to_page<T, R>(
     array: &PrimitiveArray<T>,
@@ -23,13 +24,14 @@ where
     T: num_traits::AsPrimitive<R>,
 {
     let is_optional = is_nullable(&type_.field_info);
+    let encode_options = EncodeNullability::new(is_optional);
 
     let mut buffer = vec![];
 
     let (repetition_levels_byte_length, definition_levels_byte_length) =
         nested::write_rep_and_def(options.version, nested, &mut buffer)?;
 
-    let buffer = encode_plain(array, is_optional, buffer);
+    let buffer = encode_plain(array, encode_options, buffer);
 
     let statistics = if options.has_statistics() {
         Some(build_statistics(array, type_.clone(), &options.statistics).serialize())
diff --git a/crates/polars-parquet/src/arrow/write/utils.rs b/crates/polars-parquet/src/arrow/write/utils.rs
index 7f7796b0fff2..6e3efee54be5 100644
--- a/crates/polars-parquet/src/arrow/write/utils.rs
+++ b/crates/polars-parquet/src/arrow/write/utils.rs
@@ -92,7 +92,7 @@ pub fn build_plain_page(
             max_def_level: 0,
             max_rep_level: 0,
         },
-        Some(num_rows),
+        num_rows,
     ))
 }
 
diff --git a/crates/polars-parquet/src/parquet/compression.rs b/crates/polars-parquet/src/parquet/compression.rs
index 7798af585b7b..41bfb5f557bf 100644
--- a/crates/polars-parquet/src/parquet/compression.rs
+++ b/crates/polars-parquet/src/parquet/compression.rs
@@ -26,6 +26,7 @@ fn inner_compress<
 
 /// Compresses data stored in slice `input_buf` and writes the compressed result
 /// to `output_buf`.
+///
 /// Note that you'll need to call `clear()` before reusing the same `output_buf`
 /// across different `compress` calls.
 #[allow(unused_variables)]
diff --git a/crates/polars-parquet/src/parquet/encoding/delta_bitpacked/decoder.rs b/crates/polars-parquet/src/parquet/encoding/delta_bitpacked/decoder.rs
index fb8eb153cfb7..261e84ce2e23 100644
--- a/crates/polars-parquet/src/parquet/encoding/delta_bitpacked/decoder.rs
+++ b/crates/polars-parquet/src/parquet/encoding/delta_bitpacked/decoder.rs
@@ -28,6 +28,7 @@
 //! Note that all these additions need to be wrapping.
 
 use super::super::{bitpacked, uleb128, zigzag_leb128};
+use super::lin_natural_sum;
 use crate::parquet::encoding::bitpacked::{Unpackable, Unpacked};
 use crate::parquet::error::{ParquetError, ParquetResult};
 
@@ -166,16 +167,11 @@ impl DeltaGatherer for SumGatherer {
         delta: i64,
         num_repeats: usize,
     ) -> ParquetResult<()> {
-        if v < 0 || (delta < 0 && num_repeats as i64 * delta + v < 0) {
+        if v < 0 || (delta < 0 && num_repeats > 0 && (num_repeats - 1) as i64 * delta + v < 0) {
             return Err(ParquetError::oos("Invalid delta encoding length"));
         }
 
-        let base = v * num_repeats as i64;
-        let is_even = num_repeats & 1;
-        // SUM_i=0^n f * i = f * (n(n+1)/2)
-        let increment = (num_repeats >> is_even) * ((num_repeats + 1) >> (is_even ^ 1));
-
-        *target += base as usize + increment;
+        *target += lin_natural_sum(v, delta, num_repeats) as usize;
 
         Ok(())
     }
@@ -254,6 +250,13 @@ fn gather_miniblock<G: DeltaGatherer>(
 ) -> ParquetResult<()> {
     let bitwidth = bitwidth as usize;
 
+    if bitwidth == 0 {
+        let v = last_value.wrapping_add(min_delta);
+        gatherer.gather_constant(target, v, min_delta, values_per_miniblock)?;
+        *last_value = last_value.wrapping_add(min_delta * values_per_miniblock as i64);
+        return Ok(());
+    }
+
     debug_assert!(bitwidth <= 64);
     debug_assert_eq!((bitwidth * values_per_miniblock).div_ceil(8), values.len());
 
@@ -286,18 +289,14 @@ fn gather_block<'a, G: DeltaGatherer>(
     let bitwidths;
     (bitwidths, values) = values
         .split_at_checked(num_miniblocks)
-        .ok_or(ParquetError::oos(
-            "Not enough bitwidths available in delta encoding",
-        ))?;
+        .ok_or_else(|| ParquetError::oos("Not enough bitwidths available in delta encoding"))?;
 
     gatherer.target_reserve(target, num_miniblocks * values_per_miniblock);
     for &bitwidth in bitwidths {
         let miniblock;
         (miniblock, values) = values
             .split_at_checked((bitwidth as usize * values_per_miniblock).div_ceil(8))
-            .ok_or(ParquetError::oos(
-                "Not enough bytes for miniblock in delta encoding",
-            ))?;
+            .ok_or_else(|| ParquetError::oos("Not enough bytes for miniblock in delta encoding"))?;
         gather_miniblock(
             target,
             min_delta,
@@ -372,9 +371,9 @@ impl<'a> Decoder<'a> {
                 // <min delta> <list of bitwidths of miniblocks> <miniblocks>
 
                 let (_, consumed) = zigzag_leb128::decode(rem);
-                rem = rem.get(consumed..).ok_or(ParquetError::oos(
-                    "No min-delta value in delta encoding miniblock",
-                ))?;
+                rem = rem.get(consumed..).ok_or_else(|| {
+                    ParquetError::oos("No min-delta value in delta encoding miniblock")
+                })?;
 
                 if rem.len() < num_miniblocks_per_block {
                     return Err(ParquetError::oos(
@@ -401,9 +400,11 @@ impl<'a> Decoder<'a> {
 
                 rem = rem
                     .get(num_miniblocks_per_block + num_bitpacking_bytes..)
-                    .ok_or(ParquetError::oos(
-                        "Not enough bytes for all bitpacked values in delta encoding",
-                    ))?;
+                    .ok_or_else(|| {
+                        ParquetError::oos(
+                            "Not enough bytes for all bitpacked values in delta encoding",
+                        )
+                    })?;
 
                 num_values_left = num_values_left.saturating_sub(values_per_block);
             }
diff --git a/crates/polars-parquet/src/parquet/encoding/delta_bitpacked/mod.rs b/crates/polars-parquet/src/parquet/encoding/delta_bitpacked/mod.rs
index 23e67ee7fb4f..4a32610a302e 100644
--- a/crates/polars-parquet/src/parquet/encoding/delta_bitpacked/mod.rs
+++ b/crates/polars-parquet/src/parquet/encoding/delta_bitpacked/mod.rs
@@ -5,11 +5,39 @@ mod fuzz;
 pub(crate) use decoder::{Decoder, DeltaGatherer, SumGatherer};
 pub(crate) use encoder::encode;
 
+/// The sum of `start, start + delta, start + 2 * delta, ... len times`.
+pub(crate) fn lin_natural_sum(start: i64, delta: i64, len: usize) -> i64 {
+    debug_assert!(len < i64::MAX as usize);
+
+    let base = start * len as i64;
+    let sum = if len == 0 {
+        0
+    } else {
+        let is_odd = len & 1;
+        // SUM_i=0^n f * i = f * (n(n+1)/2)
+        let sum = (len >> (is_odd ^ 1)) * (len.wrapping_sub(1) >> is_odd);
+        delta * sum as i64
+    };
+
+    base + sum
+}
+
 #[cfg(test)]
 mod tests {
     use super::*;
     use crate::parquet::error::{ParquetError, ParquetResult};
 
+    #[test]
+    fn linear_natural_sum() {
+        assert_eq!(lin_natural_sum(0, 0, 0), 0);
+        assert_eq!(lin_natural_sum(10, 4, 0), 0);
+        assert_eq!(lin_natural_sum(0, 1, 1), 0);
+        assert_eq!(lin_natural_sum(0, 1, 3), 3);
+        assert_eq!(lin_natural_sum(0, 1, 4), 6);
+        assert_eq!(lin_natural_sum(0, 2, 3), 6);
+        assert_eq!(lin_natural_sum(2, 2, 3), 12);
+    }
+
     #[test]
     fn basic() -> Result<(), ParquetError> {
         let data = vec![1, 3, 1, 2, 3];
diff --git a/crates/polars-parquet/src/parquet/indexes/index.rs b/crates/polars-parquet/src/parquet/indexes/index.rs
deleted file mode 100644
index ecf11fe7f30e..000000000000
--- a/crates/polars-parquet/src/parquet/indexes/index.rs
+++ /dev/null
@@ -1,322 +0,0 @@
-use std::any::Any;
-
-use parquet_format_safe::ColumnIndex;
-
-use crate::parquet::error::ParquetError;
-use crate::parquet::parquet_bridge::BoundaryOrder;
-use crate::parquet::schema::types::{PhysicalType, PrimitiveType};
-use crate::parquet::types::NativeType;
-
-/// Trait object representing a [`ColumnIndex`] in Rust's native format.
-///
-/// See [`NativeIndex`], [`ByteIndex`] and [`FixedLenByteIndex`] for concrete implementations.
-pub trait Index: Send + Sync + std::fmt::Debug {
-    fn as_any(&self) -> &dyn Any;
-
-    fn physical_type(&self) -> &PhysicalType;
-}
-
-impl PartialEq for dyn Index + '_ {
-    fn eq(&self, that: &dyn Index) -> bool {
-        equal(self, that)
-    }
-}
-
-impl Eq for dyn Index + '_ {}
-
-fn equal(lhs: &dyn Index, rhs: &dyn Index) -> bool {
-    if lhs.physical_type() != rhs.physical_type() {
-        return false;
-    }
-
-    match lhs.physical_type() {
-        PhysicalType::Boolean => {
-            lhs.as_any().downcast_ref::<BooleanIndex>().unwrap()
-                == rhs.as_any().downcast_ref::<BooleanIndex>().unwrap()
-        },
-        PhysicalType::Int32 => {
-            lhs.as_any().downcast_ref::<NativeIndex<i32>>().unwrap()
-                == rhs.as_any().downcast_ref::<NativeIndex<i32>>().unwrap()
-        },
-        PhysicalType::Int64 => {
-            lhs.as_any().downcast_ref::<NativeIndex<i64>>().unwrap()
-                == rhs.as_any().downcast_ref::<NativeIndex<i64>>().unwrap()
-        },
-        PhysicalType::Int96 => {
-            lhs.as_any()
-                .downcast_ref::<NativeIndex<[u32; 3]>>()
-                .unwrap()
-                == rhs
-                    .as_any()
-                    .downcast_ref::<NativeIndex<[u32; 3]>>()
-                    .unwrap()
-        },
-        PhysicalType::Float => {
-            lhs.as_any().downcast_ref::<NativeIndex<f32>>().unwrap()
-                == rhs.as_any().downcast_ref::<NativeIndex<f32>>().unwrap()
-        },
-        PhysicalType::Double => {
-            lhs.as_any().downcast_ref::<NativeIndex<f64>>().unwrap()
-                == rhs.as_any().downcast_ref::<NativeIndex<f64>>().unwrap()
-        },
-        PhysicalType::ByteArray => {
-            lhs.as_any().downcast_ref::<ByteIndex>().unwrap()
-                == rhs.as_any().downcast_ref::<ByteIndex>().unwrap()
-        },
-        PhysicalType::FixedLenByteArray(_) => {
-            lhs.as_any().downcast_ref::<FixedLenByteIndex>().unwrap()
-                == rhs.as_any().downcast_ref::<FixedLenByteIndex>().unwrap()
-        },
-    }
-}
-
-/// An index of a column of [`NativeType`] physical representation
-#[derive(Debug, Clone, PartialEq, Eq, Hash)]
-pub struct NativeIndex<T: NativeType> {
-    /// The primitive type
-    pub primitive_type: PrimitiveType,
-    /// The indexes, one item per page
-    pub indexes: Vec<PageIndex<T>>,
-    /// the order
-    pub boundary_order: BoundaryOrder,
-}
-
-impl<T: NativeType> NativeIndex<T> {
-    /// Creates a new [`NativeIndex`]
-    pub(crate) fn try_new(
-        index: ColumnIndex,
-        primitive_type: PrimitiveType,
-    ) -> Result<Self, ParquetError> {
-        let len = index.min_values.len();
-
-        let null_counts = index
-            .null_counts
-            .map(|x| x.into_iter().map(Some).collect::<Vec<_>>())
-            .unwrap_or_else(|| vec![None; len]);
-
-        let indexes = index
-            .min_values
-            .iter()
-            .zip(index.max_values.into_iter())
-            .zip(index.null_pages.into_iter())
-            .zip(null_counts.into_iter())
-            .map(|(((min, max), is_null), null_count)| {
-                let (min, max) = if is_null {
-                    (None, None)
-                } else {
-                    let min = min.as_slice().try_into()?;
-                    let max = max.as_slice().try_into()?;
-                    (Some(T::from_le_bytes(min)), Some(T::from_le_bytes(max)))
-                };
-                Ok(PageIndex {
-                    min,
-                    max,
-                    null_count,
-                })
-            })
-            .collect::<Result<Vec<_>, ParquetError>>()?;
-
-        Ok(Self {
-            primitive_type,
-            indexes,
-            boundary_order: index.boundary_order.try_into()?,
-        })
-    }
-}
-
-/// The index of a page, containing the min and max values of the page.
-#[derive(Debug, Clone, PartialEq, Eq, Hash)]
-pub struct PageIndex<T> {
-    /// The minimum value in the page. It is None when all values are null
-    pub min: Option<T>,
-    /// The maximum value in the page. It is None when all values are null
-    pub max: Option<T>,
-    /// The number of null values in the page
-    pub null_count: Option<i64>,
-}
-
-impl<T: NativeType> Index for NativeIndex<T> {
-    fn as_any(&self) -> &dyn Any {
-        self
-    }
-
-    fn physical_type(&self) -> &PhysicalType {
-        &T::TYPE
-    }
-}
-
-/// An index of a column of bytes physical type
-#[derive(Debug, Clone, PartialEq, Eq, Hash)]
-pub struct ByteIndex {
-    /// The [`PrimitiveType`].
-    pub primitive_type: PrimitiveType,
-    /// The indexes, one item per page
-    pub indexes: Vec<PageIndex<Vec<u8>>>,
-    pub boundary_order: BoundaryOrder,
-}
-
-impl ByteIndex {
-    pub(crate) fn try_new(
-        index: ColumnIndex,
-        primitive_type: PrimitiveType,
-    ) -> Result<Self, ParquetError> {
-        let len = index.min_values.len();
-
-        let null_counts = index
-            .null_counts
-            .map(|x| x.into_iter().map(Some).collect::<Vec<_>>())
-            .unwrap_or_else(|| vec![None; len]);
-
-        let indexes = index
-            .min_values
-            .into_iter()
-            .zip(index.max_values.into_iter())
-            .zip(index.null_pages.into_iter())
-            .zip(null_counts.into_iter())
-            .map(|(((min, max), is_null), null_count)| {
-                let (min, max) = if is_null {
-                    (None, None)
-                } else {
-                    (Some(min), Some(max))
-                };
-                Ok(PageIndex {
-                    min,
-                    max,
-                    null_count,
-                })
-            })
-            .collect::<Result<Vec<_>, ParquetError>>()?;
-
-        Ok(Self {
-            primitive_type,
-            indexes,
-            boundary_order: index.boundary_order.try_into()?,
-        })
-    }
-}
-
-impl Index for ByteIndex {
-    fn as_any(&self) -> &dyn Any {
-        self
-    }
-
-    fn physical_type(&self) -> &PhysicalType {
-        &PhysicalType::ByteArray
-    }
-}
-
-/// An index of a column of fixed len byte physical type
-#[derive(Debug, Clone, PartialEq, Eq, Hash)]
-pub struct FixedLenByteIndex {
-    /// The [`PrimitiveType`].
-    pub primitive_type: PrimitiveType,
-    /// The indexes, one item per page
-    pub indexes: Vec<PageIndex<Vec<u8>>>,
-    pub boundary_order: BoundaryOrder,
-}
-
-impl FixedLenByteIndex {
-    pub(crate) fn try_new(
-        index: ColumnIndex,
-        primitive_type: PrimitiveType,
-    ) -> Result<Self, ParquetError> {
-        let len = index.min_values.len();
-
-        let null_counts = index
-            .null_counts
-            .map(|x| x.into_iter().map(Some).collect::<Vec<_>>())
-            .unwrap_or_else(|| vec![None; len]);
-
-        let indexes = index
-            .min_values
-            .into_iter()
-            .zip(index.max_values.into_iter())
-            .zip(index.null_pages.into_iter())
-            .zip(null_counts.into_iter())
-            .map(|(((min, max), is_null), null_count)| {
-                let (min, max) = if is_null {
-                    (None, None)
-                } else {
-                    (Some(min), Some(max))
-                };
-                Ok(PageIndex {
-                    min,
-                    max,
-                    null_count,
-                })
-            })
-            .collect::<Result<Vec<_>, ParquetError>>()?;
-
-        Ok(Self {
-            primitive_type,
-            indexes,
-            boundary_order: index.boundary_order.try_into()?,
-        })
-    }
-}
-
-impl Index for FixedLenByteIndex {
-    fn as_any(&self) -> &dyn Any {
-        self
-    }
-
-    fn physical_type(&self) -> &PhysicalType {
-        &self.primitive_type.physical_type
-    }
-}
-
-/// An index of a column of boolean physical type
-#[derive(Debug, Clone, PartialEq, Eq, Hash)]
-pub struct BooleanIndex {
-    /// The indexes, one item per page
-    pub indexes: Vec<PageIndex<bool>>,
-    pub boundary_order: BoundaryOrder,
-}
-
-impl BooleanIndex {
-    pub(crate) fn try_new(index: ColumnIndex) -> Result<Self, ParquetError> {
-        let len = index.min_values.len();
-
-        let null_counts = index
-            .null_counts
-            .map(|x| x.into_iter().map(Some).collect::<Vec<_>>())
-            .unwrap_or_else(|| vec![None; len]);
-
-        let indexes = index
-            .min_values
-            .into_iter()
-            .zip(index.max_values.into_iter())
-            .zip(index.null_pages.into_iter())
-            .zip(null_counts.into_iter())
-            .map(|(((min, max), is_null), null_count)| {
-                let (min, max) = if is_null {
-                    (None, None)
-                } else {
-                    let min = min[0] == 1;
-                    let max = max[0] == 1;
-                    (Some(min), Some(max))
-                };
-                Ok(PageIndex {
-                    min,
-                    max,
-                    null_count,
-                })
-            })
-            .collect::<Result<Vec<_>, ParquetError>>()?;
-
-        Ok(Self {
-            indexes,
-            boundary_order: index.boundary_order.try_into()?,
-        })
-    }
-}
-
-impl Index for BooleanIndex {
-    fn as_any(&self) -> &dyn Any {
-        self
-    }
-
-    fn physical_type(&self) -> &PhysicalType {
-        &PhysicalType::Boolean
-    }
-}
diff --git a/crates/polars-parquet/src/parquet/indexes/intervals.rs b/crates/polars-parquet/src/parquet/indexes/intervals.rs
deleted file mode 100644
index d04d3104a618..000000000000
--- a/crates/polars-parquet/src/parquet/indexes/intervals.rs
+++ /dev/null
@@ -1,139 +0,0 @@
-use parquet_format_safe::PageLocation;
-#[cfg(feature = "serde_types")]
-use serde::{Deserialize, Serialize};
-
-use crate::parquet::error::ParquetError;
-
-/// An interval
-#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
-#[cfg_attr(feature = "serde_types", derive(Deserialize, Serialize))]
-pub struct Interval {
-    /// Its start
-    pub start: usize,
-    /// Its length
-    pub length: usize,
-}
-
-impl Interval {
-    /// Create a new interval
-    pub fn new(start: usize, length: usize) -> Self {
-        Self { start, length }
-    }
-}
-
-/// Returns the set of (row) intervals of the pages.
-/// # Errors
-/// This function errors if the locations are not castable to `usize` or such that
-/// their ranges of row are larger than `num_rows`.
-pub fn compute_page_row_intervals(
-    locations: &[PageLocation],
-    num_rows: usize,
-) -> Result<Vec<Interval>, ParquetError> {
-    if locations.is_empty() {
-        return Ok(vec![]);
-    };
-
-    let last = (|| {
-        let start: usize = locations.last().unwrap().first_row_index.try_into()?;
-        let length = num_rows.checked_sub(start).ok_or_else(|| {
-            ParquetError::oos("Page start cannot be smaller than the number of rows")
-        })?;
-        Result::<_, ParquetError>::Ok(Interval::new(start, length))
-    })();
-
-    let pages_lengths = locations
-        .windows(2)
-        .map(|x| {
-            let start = x[0].first_row_index.try_into()?;
-
-            let length = x[1]
-                .first_row_index
-                .checked_sub(x[0].first_row_index)
-                .ok_or_else(|| {
-                    ParquetError::oos("Page start cannot be smaller than the number of rows")
-                })?
-                .try_into()?;
-
-            Ok(Interval::new(start, length))
-        })
-        .chain(std::iter::once(last));
-    pages_lengths.collect()
-}
-
-/// Returns the set of intervals `(start, len)` containing all the
-/// selected rows (for a given column)
-pub fn compute_rows(
-    selected: &[bool],
-    locations: &[PageLocation],
-    num_rows: usize,
-) -> Result<Vec<Interval>, ParquetError> {
-    let page_intervals = compute_page_row_intervals(locations, num_rows)?;
-
-    Ok(selected
-        .iter()
-        .zip(page_intervals.iter().copied())
-        .filter_map(
-            |(&is_selected, page)| {
-                if is_selected {
-                    Some(page)
-                } else {
-                    None
-                }
-            },
-        )
-        .collect())
-}
-
-/// An enum describing a page that was either selected in a filter pushdown or skipped
-#[derive(Debug, Clone, PartialEq, Eq, Hash)]
-#[cfg_attr(feature = "serde_types", derive(Deserialize, Serialize))]
-pub struct FilteredPage {
-    /// Location of the page in the file
-    pub start: u64,
-    pub length: usize,
-    /// rows to select from the page
-    pub selected_rows: Vec<Interval>,
-    pub num_rows: usize,
-}
-
-fn is_in(probe: Interval, intervals: &[Interval]) -> Vec<Interval> {
-    intervals
-        .iter()
-        .filter_map(|interval| {
-            let interval_end = interval.start + interval.length;
-            let probe_end = probe.start + probe.length;
-            let overlaps = (probe.start < interval_end) && (probe_end > interval.start);
-            if overlaps {
-                let start = interval.start.max(probe.start);
-                let end = interval_end.min(probe_end);
-                Some(Interval::new(start - probe.start, end - start))
-            } else {
-                None
-            }
-        })
-        .collect()
-}
-
-/// Given a set of selected [Interval]s of rows and the set of [`PageLocation`], returns the
-/// a set of [`FilteredPage`] with the same number of items as `locations`.
-pub fn select_pages(
-    intervals: &[Interval],
-    locations: &[PageLocation],
-    num_rows: usize,
-) -> Result<Vec<FilteredPage>, ParquetError> {
-    let page_intervals = compute_page_row_intervals(locations, num_rows)?;
-
-    page_intervals
-        .into_iter()
-        .zip(locations.iter())
-        .map(|(interval, location)| {
-            let selected_rows = is_in(interval, intervals);
-            Ok(FilteredPage {
-                start: location.offset.try_into()?,
-                length: location.compressed_page_size.try_into()?,
-                selected_rows,
-                num_rows: interval.length,
-            })
-        })
-        .collect()
-}
diff --git a/crates/polars-parquet/src/parquet/indexes/mod.rs b/crates/polars-parquet/src/parquet/indexes/mod.rs
deleted file mode 100644
index f652f8bb4be3..000000000000
--- a/crates/polars-parquet/src/parquet/indexes/mod.rs
+++ /dev/null
@@ -1,234 +0,0 @@
-mod index;
-mod intervals;
-
-pub use intervals::{compute_rows, select_pages, FilteredPage, Interval};
-
-pub use self::index::{BooleanIndex, ByteIndex, FixedLenByteIndex, Index, NativeIndex, PageIndex};
-pub use crate::parquet::parquet_bridge::BoundaryOrder;
-pub use crate::parquet::thrift_format::PageLocation;
-
-#[cfg(test)]
-mod tests {
-    use super::*;
-    use crate::parquet::schema::types::{PhysicalType, PrimitiveType};
-
-    #[test]
-    fn test_basic() {
-        let locations = &[PageLocation {
-            offset: 100,
-            compressed_page_size: 10,
-            first_row_index: 0,
-        }];
-        let num_rows = 10;
-
-        let row_intervals = compute_rows(&[true; 1], locations, num_rows).unwrap();
-        assert_eq!(row_intervals, vec![Interval::new(0, 10)])
-    }
-
-    #[test]
-    fn test_multiple() {
-        // two pages
-        let index = ByteIndex {
-            primitive_type: PrimitiveType::from_physical("c1".to_string(), PhysicalType::ByteArray),
-            indexes: vec![
-                PageIndex {
-                    min: Some(vec![0]),
-                    max: Some(vec![8, 9]),
-                    null_count: Some(0),
-                },
-                PageIndex {
-                    min: Some(vec![20]),
-                    max: Some(vec![98, 99]),
-                    null_count: Some(0),
-                },
-            ],
-            boundary_order: Default::default(),
-        };
-        let locations = &[
-            PageLocation {
-                offset: 100,
-                compressed_page_size: 10,
-                first_row_index: 0,
-            },
-            PageLocation {
-                offset: 110,
-                compressed_page_size: 20,
-                first_row_index: 5,
-            },
-        ];
-        let num_rows = 10;
-
-        // filter of the form `x > "a"`
-        let selector = |page: &PageIndex<Vec<u8>>| {
-            page.max
-                .as_ref()
-                .map(|x| x.as_slice()[0] > 97)
-                .unwrap_or(false) // no max is present => all nulls => not selected
-        };
-        let selected = index.indexes.iter().map(selector).collect::<Vec<_>>();
-
-        let rows = compute_rows(&selected, locations, num_rows).unwrap();
-        assert_eq!(rows, vec![Interval::new(5, 5)]);
-
-        let pages = select_pages(&rows, locations, num_rows).unwrap();
-
-        assert_eq!(
-            pages,
-            vec![
-                FilteredPage {
-                    start: 100,
-                    length: 10,
-                    selected_rows: vec![],
-                    num_rows: 5
-                },
-                FilteredPage {
-                    start: 110,
-                    length: 20,
-                    selected_rows: vec![Interval::new(0, 5)],
-                    num_rows: 5
-                }
-            ]
-        );
-    }
-
-    #[test]
-    fn test_other_column() {
-        let locations = &[
-            PageLocation {
-                offset: 100,
-                compressed_page_size: 20,
-                first_row_index: 0,
-            },
-            PageLocation {
-                offset: 120,
-                compressed_page_size: 20,
-                first_row_index: 10,
-            },
-        ];
-        let num_rows = 100;
-
-        let intervals = &[Interval::new(5, 5)];
-
-        let pages = select_pages(intervals, locations, num_rows).unwrap();
-
-        assert_eq!(
-            pages,
-            vec![
-                FilteredPage {
-                    start: 100,
-                    length: 20,
-                    selected_rows: vec![Interval::new(5, 5)],
-                    num_rows: 10,
-                },
-                FilteredPage {
-                    start: 120,
-                    length: 20,
-                    selected_rows: vec![],
-                    num_rows: 90
-                },
-            ]
-        );
-    }
-
-    #[test]
-    fn test_other_interval_in_middle() {
-        let locations = &[
-            PageLocation {
-                offset: 100,
-                compressed_page_size: 20,
-                first_row_index: 0,
-            },
-            PageLocation {
-                offset: 120,
-                compressed_page_size: 20,
-                first_row_index: 10,
-            },
-            PageLocation {
-                offset: 140,
-                compressed_page_size: 20,
-                first_row_index: 100,
-            },
-        ];
-        let num_rows = 200;
-
-        // interval partially intersects 2 pages (0 and 1)
-        let intervals = &[Interval::new(5, 6)];
-
-        let pages = select_pages(intervals, locations, num_rows).unwrap();
-
-        assert_eq!(
-            pages,
-            vec![
-                FilteredPage {
-                    start: 100,
-                    length: 20,
-                    selected_rows: vec![Interval::new(5, 5)],
-                    num_rows: 10,
-                },
-                FilteredPage {
-                    start: 120,
-                    length: 20,
-                    selected_rows: vec![Interval::new(0, 1)],
-                    num_rows: 90,
-                },
-                FilteredPage {
-                    start: 140,
-                    length: 20,
-                    selected_rows: vec![],
-                    num_rows: 100
-                },
-            ]
-        );
-    }
-
-    #[test]
-    fn test_other_column2() {
-        let locations = &[
-            PageLocation {
-                offset: 100,
-                compressed_page_size: 20,
-                first_row_index: 0,
-            },
-            PageLocation {
-                offset: 120,
-                compressed_page_size: 20,
-                first_row_index: 10,
-            },
-            PageLocation {
-                offset: 140,
-                compressed_page_size: 20,
-                first_row_index: 100,
-            },
-        ];
-        let num_rows = 200;
-
-        // interval partially intersects 1 page (0)
-        let intervals = &[Interval::new(0, 1)];
-
-        let pages = select_pages(intervals, locations, num_rows).unwrap();
-
-        assert_eq!(
-            pages,
-            vec![
-                FilteredPage {
-                    start: 100,
-                    length: 20,
-                    selected_rows: vec![Interval::new(0, 1)],
-                    num_rows: 10,
-                },
-                FilteredPage {
-                    start: 120,
-                    length: 20,
-                    selected_rows: vec![],
-                    num_rows: 90
-                },
-                FilteredPage {
-                    start: 140,
-                    length: 20,
-                    selected_rows: vec![],
-                    num_rows: 100
-                },
-            ]
-        );
-    }
-}
diff --git a/crates/polars-parquet/src/parquet/mod.rs b/crates/polars-parquet/src/parquet/mod.rs
index f40b21ea0e04..ea6b5b2c8357 100644
--- a/crates/polars-parquet/src/parquet/mod.rs
+++ b/crates/polars-parquet/src/parquet/mod.rs
@@ -4,7 +4,6 @@ pub mod error;
 pub mod bloom_filter;
 pub mod compression;
 pub mod encoding;
-pub mod indexes;
 pub mod metadata;
 pub mod page;
 mod parquet_bridge;
diff --git a/crates/polars-parquet/src/parquet/page/mod.rs b/crates/polars-parquet/src/parquet/page/mod.rs
index 62b3aa20163b..128f1af03c14 100644
--- a/crates/polars-parquet/src/parquet/page/mod.rs
+++ b/crates/polars-parquet/src/parquet/page/mod.rs
@@ -2,7 +2,6 @@ use super::CowBuffer;
 use crate::parquet::compression::Compression;
 use crate::parquet::encoding::{get_length, Encoding};
 use crate::parquet::error::{ParquetError, ParquetResult};
-use crate::parquet::indexes::Interval;
 use crate::parquet::metadata::Descriptor;
 pub use crate::parquet::parquet_bridge::{DataPageHeaderExt, PageType};
 use crate::parquet::statistics::Statistics;
@@ -24,9 +23,7 @@ pub struct CompressedDataPage {
     pub(crate) compression: Compression,
     uncompressed_page_size: usize,
     pub(crate) descriptor: Descriptor,
-
-    // The offset and length in rows
-    pub(crate) selected_rows: Option<Vec<Interval>>,
+    pub num_rows: Option<usize>,
 }
 
 impl CompressedDataPage {
@@ -37,16 +34,16 @@ impl CompressedDataPage {
         compression: Compression,
         uncompressed_page_size: usize,
         descriptor: Descriptor,
-        rows: Option<usize>,
+        num_rows: usize,
     ) -> Self {
-        Self::new_read(
+        Self {
             header,
             buffer,
             compression,
             uncompressed_page_size,
             descriptor,
-            rows.map(|x| vec![Interval::new(0, x)]),
-        )
+            num_rows: Some(num_rows),
+        }
     }
 
     /// Returns a new [`CompressedDataPage`].
@@ -56,7 +53,6 @@ impl CompressedDataPage {
         compression: Compression,
         uncompressed_page_size: usize,
         descriptor: Descriptor,
-        selected_rows: Option<Vec<Interval>>,
     ) -> Self {
         Self {
             header,
@@ -64,7 +60,7 @@ impl CompressedDataPage {
             compression,
             uncompressed_page_size,
             descriptor,
-            selected_rows,
+            num_rows: None,
         }
     }
 
@@ -87,16 +83,14 @@ impl CompressedDataPage {
         self.compression
     }
 
-    /// the rows to be selected by this page.
-    /// When `None`, all rows are to be considered.
-    pub fn selected_rows(&self) -> Option<&[Interval]> {
-        self.selected_rows.as_deref()
-    }
-
     pub fn num_values(&self) -> usize {
         self.header.num_values()
     }
 
+    pub fn num_rows(&self) -> Option<usize> {
+        self.num_rows
+    }
+
     /// Decodes the raw statistics into a statistics
     pub fn statistics(&self) -> Option<ParquetResult<Statistics>> {
         match &self.header {
@@ -111,11 +105,6 @@ impl CompressedDataPage {
         }
     }
 
-    #[inline]
-    pub fn select_rows(&mut self, selected_rows: Vec<Interval>) {
-        self.selected_rows = Some(selected_rows);
-    }
-
     pub fn slice_mut(&mut self) -> &mut CowBuffer {
         &mut self.buffer
     }
@@ -143,7 +132,7 @@ pub struct DataPage {
     pub(super) header: DataPageHeader,
     pub(super) buffer: CowBuffer,
     pub descriptor: Descriptor,
-    pub selected_rows: Option<Vec<Interval>>,
+    pub num_rows: Option<usize>,
 }
 
 impl DataPage {
@@ -151,27 +140,26 @@ impl DataPage {
         header: DataPageHeader,
         buffer: CowBuffer,
         descriptor: Descriptor,
-        rows: Option<usize>,
+        num_rows: usize,
     ) -> Self {
-        Self::new_read(
+        Self {
             header,
             buffer,
             descriptor,
-            rows.map(|x| vec![Interval::new(0, x)]),
-        )
+            num_rows: Some(num_rows),
+        }
     }
 
     pub(crate) fn new_read(
         header: DataPageHeader,
         buffer: CowBuffer,
         descriptor: Descriptor,
-        selected_rows: Option<Vec<Interval>>,
     ) -> Self {
         Self {
             header,
             buffer,
             descriptor,
-            selected_rows,
+            num_rows: None,
         }
     }
 
@@ -183,12 +171,6 @@ impl DataPage {
         &self.buffer
     }
 
-    /// the rows to be selected by this page.
-    /// When `None`, all rows are to be considered.
-    pub fn selected_rows(&self) -> Option<&[Interval]> {
-        self.selected_rows.as_deref()
-    }
-
     /// Returns a mutable reference to the internal buffer.
     /// Useful to recover the buffer after the page has been decoded.
     pub fn buffer_mut(&mut self) -> &mut Vec<u8> {
@@ -199,6 +181,10 @@ impl DataPage {
         self.header.num_values()
     }
 
+    pub fn num_rows(&self) -> Option<usize> {
+        self.num_rows
+    }
+
     pub fn encoding(&self) -> Encoding {
         match &self.header {
             DataPageHeader::V1(d) => d.encoding(),
@@ -300,10 +286,10 @@ impl CompressedPage {
         }
     }
 
-    pub(crate) fn selected_rows(&self) -> Option<&[Interval]> {
+    pub(crate) fn num_rows(&self) -> Option<usize> {
         match self {
-            CompressedPage::Data(page) => page.selected_rows(),
-            CompressedPage::Dict(_) => None,
+            CompressedPage::Data(page) => page.num_rows(),
+            CompressedPage::Dict(_) => Some(0),
         }
     }
 
diff --git a/crates/polars-parquet/src/parquet/read/column/mod.rs b/crates/polars-parquet/src/parquet/read/column/mod.rs
index 1a1277637f27..d6bcda08fe2d 100644
--- a/crates/polars-parquet/src/parquet/read/column/mod.rs
+++ b/crates/polars-parquet/src/parquet/read/column/mod.rs
@@ -1,4 +1,3 @@
-use std::io::{Read, Seek};
 use std::vec::IntoIter;
 
 use super::{get_field_columns, get_page_iterator, MemReader, PageReader};
@@ -7,10 +6,6 @@ use crate::parquet::metadata::{ColumnChunkMetaData, RowGroupMetaData};
 use crate::parquet::page::CompressedPage;
 use crate::parquet::schema::types::ParquetType;
 
-#[cfg(feature = "async")]
-#[cfg_attr(docsrs, doc(cfg(feature = "async")))]
-mod stream;
-
 /// Returns a [`ColumnIterator`] of column chunks corresponding to `field`.
 ///
 /// Contrarily to [`get_page_iterator`] that returns a single iterator of pages, this iterator
@@ -149,38 +144,3 @@ impl MutStreamingIterator for ReadColumnIterator {
         self.current.as_mut()
     }
 }
-
-/// Reads all columns that are part of the parquet field `field_name`
-/// # Implementation
-/// This operation is IO-bounded `O(C)` where C is the number of columns associated to
-/// the field (one for non-nested types)
-/// It reads the columns sequentially. Use [`read_column`] to fork this operation to multiple
-/// readers.
-pub fn read_columns<'a, R: Read + Seek>(
-    reader: &mut R,
-    columns: &'a [ColumnChunkMetaData],
-    field_name: &'a str,
-) -> Result<Vec<(&'a ColumnChunkMetaData, Vec<u8>)>, ParquetError> {
-    get_field_columns(columns, field_name)
-        .map(|column| read_column(reader, column).map(|c| (column, c)))
-        .collect()
-}
-
-/// Reads a column chunk into memory
-/// This operation is IO-bounded and allocates the column's `compressed_size`.
-pub fn read_column<R>(reader: &mut R, column: &ColumnChunkMetaData) -> Result<Vec<u8>, ParquetError>
-where
-    R: Read + Seek,
-{
-    let (start, length) = column.byte_range();
-    reader.seek(std::io::SeekFrom::Start(start))?;
-
-    let mut chunk = vec![];
-    chunk.try_reserve(length as usize)?;
-    reader.by_ref().take(length).read_to_end(&mut chunk)?;
-    Ok(chunk)
-}
-
-#[cfg(feature = "async")]
-#[cfg_attr(docsrs, doc(cfg(feature = "async")))]
-pub use stream::{read_column_async, read_columns_async};
diff --git a/crates/polars-parquet/src/parquet/read/column/stream.rs b/crates/polars-parquet/src/parquet/read/column/stream.rs
deleted file mode 100644
index 63319d2260c6..000000000000
--- a/crates/polars-parquet/src/parquet/read/column/stream.rs
+++ /dev/null
@@ -1,51 +0,0 @@
-use futures::future::{try_join_all, BoxFuture};
-use futures::{AsyncRead, AsyncReadExt, AsyncSeek, AsyncSeekExt};
-
-use crate::parquet::error::ParquetError;
-use crate::parquet::metadata::ColumnChunkMetaData;
-use crate::parquet::read::get_field_columns;
-
-/// Reads a single column chunk into memory asynchronously
-pub async fn read_column_async<'b, R, F>(
-    factory: F,
-    meta: &ColumnChunkMetaData,
-) -> Result<Vec<u8>, ParquetError>
-where
-    R: AsyncRead + AsyncSeek + Send + Unpin,
-    F: Fn() -> BoxFuture<'b, std::io::Result<R>>,
-{
-    let mut reader = factory().await?;
-    let (start, length) = meta.byte_range();
-    reader.seek(std::io::SeekFrom::Start(start)).await?;
-
-    let mut chunk = vec![];
-    chunk.try_reserve(length as usize)?;
-    reader.take(length).read_to_end(&mut chunk).await?;
-    Result::Ok(chunk)
-}
-
-/// Reads all columns that are part of the parquet field `field_name`
-/// # Implementation
-/// This operation is IO-bounded `O(C)` where C is the number of columns associated to
-/// the field (one for non-nested types)
-///
-/// It does so asynchronously via a single `join_all` over all the necessary columns for
-/// `field_name`.
-pub async fn read_columns_async<
-    'a,
-    'b,
-    R: AsyncRead + AsyncSeek + Send + Unpin,
-    F: Fn() -> BoxFuture<'b, std::io::Result<R>> + Clone,
->(
-    factory: F,
-    columns: &'a [ColumnChunkMetaData],
-    field_name: &'a str,
-) -> Result<Vec<(&'a ColumnChunkMetaData, Vec<u8>)>, ParquetError> {
-    let fields = get_field_columns(columns, field_name).collect::<Vec<_>>();
-    let futures = fields
-        .iter()
-        .map(|meta| async { read_column_async(factory.clone(), meta).await });
-
-    let columns = try_join_all(futures).await?;
-    Ok(fields.into_iter().zip(columns).collect())
-}
diff --git a/crates/polars-parquet/src/parquet/read/compression.rs b/crates/polars-parquet/src/parquet/read/compression.rs
index 0996093b31f0..a3d2db312ada 100644
--- a/crates/polars-parquet/src/parquet/read/compression.rs
+++ b/crates/polars-parquet/src/parquet/read/compression.rs
@@ -3,7 +3,9 @@ use parquet_format_safe::DataPageHeaderV2;
 use super::PageReader;
 use crate::parquet::compression::{self, Compression};
 use crate::parquet::error::{ParquetError, ParquetResult};
-use crate::parquet::page::{CompressedPage, DataPage, DataPageHeader, DictPage, Page};
+use crate::parquet::page::{
+    CompressedDataPage, CompressedPage, DataPage, DataPageHeader, DictPage, Page,
+};
 use crate::parquet::CowBuffer;
 
 fn decompress_v1(
@@ -103,7 +105,6 @@ fn create_page(compressed_page: CompressedPage, buffer: Vec<u8>) -> Page {
             page.header,
             CowBuffer::Owned(buffer),
             page.descriptor,
-            page.selected_rows,
         )),
         CompressedPage::Dict(page) => Page::Dict(DictPage {
             buffer: CowBuffer::Owned(buffer),
@@ -205,8 +206,27 @@ impl BasicDecompressor {
     }
 }
 
+pub struct DataPageItem {
+    page: CompressedDataPage,
+}
+
+impl DataPageItem {
+    pub fn num_values(&self) -> usize {
+        self.page.num_values()
+    }
+
+    pub fn decompress(self, decompressor: &mut BasicDecompressor) -> ParquetResult<DataPage> {
+        let p = decompress(CompressedPage::Data(self.page), &mut decompressor.buffer)?;
+        let Page::Data(p) = p else {
+            panic!("Decompressing a data page should result in a data page");
+        };
+
+        Ok(p)
+    }
+}
+
 impl Iterator for BasicDecompressor {
-    type Item = ParquetResult<DataPage>;
+    type Item = ParquetResult<DataPageItem>;
 
     fn next(&mut self) -> Option<Self::Item> {
         let page = match self.reader.next() {
@@ -215,15 +235,13 @@ impl Iterator for BasicDecompressor {
             Some(Ok(p)) => p,
         };
 
-        Some(decompress(page, &mut self.buffer).and_then(|p| {
-            let Page::Data(p) = p else {
-                return Err(ParquetError::oos(
-                    "Found dictionary page beyond the first page of a column chunk",
-                ));
-            };
+        let CompressedPage::Data(page) = page else {
+            return Some(Err(ParquetError::oos(
+                "Found dictionary page beyond the first page of a column chunk",
+            )));
+        };
 
-            Ok(p)
-        }))
+        Some(Ok(DataPageItem { page }))
     }
 
     fn size_hint(&self) -> (usize, Option<usize>) {
diff --git a/crates/polars-parquet/src/parquet/read/indexes/deserialize.rs b/crates/polars-parquet/src/parquet/read/indexes/deserialize.rs
deleted file mode 100644
index d6bfb4de8a06..000000000000
--- a/crates/polars-parquet/src/parquet/read/indexes/deserialize.rs
+++ /dev/null
@@ -1,30 +0,0 @@
-use parquet_format_safe::thrift::protocol::TCompactInputProtocol;
-use parquet_format_safe::ColumnIndex;
-
-use crate::parquet::error::ParquetError;
-use crate::parquet::indexes::{BooleanIndex, ByteIndex, FixedLenByteIndex, Index, NativeIndex};
-use crate::parquet::schema::types::{PhysicalType, PrimitiveType};
-
-pub fn deserialize(
-    data: &[u8],
-    primitive_type: PrimitiveType,
-) -> Result<Box<dyn Index>, ParquetError> {
-    let mut prot = TCompactInputProtocol::new(data, data.len() * 2 + 1024);
-
-    let index = ColumnIndex::read_from_in_protocol(&mut prot)?;
-
-    let index = match primitive_type.physical_type {
-        PhysicalType::Boolean => Box::new(BooleanIndex::try_new(index)?) as Box<dyn Index>,
-        PhysicalType::Int32 => Box::new(NativeIndex::<i32>::try_new(index, primitive_type)?),
-        PhysicalType::Int64 => Box::new(NativeIndex::<i64>::try_new(index, primitive_type)?),
-        PhysicalType::Int96 => Box::new(NativeIndex::<[u32; 3]>::try_new(index, primitive_type)?),
-        PhysicalType::Float => Box::new(NativeIndex::<f32>::try_new(index, primitive_type)?),
-        PhysicalType::Double => Box::new(NativeIndex::<f64>::try_new(index, primitive_type)?),
-        PhysicalType::ByteArray => Box::new(ByteIndex::try_new(index, primitive_type)?),
-        PhysicalType::FixedLenByteArray(_) => {
-            Box::new(FixedLenByteIndex::try_new(index, primitive_type)?)
-        },
-    };
-
-    Ok(index)
-}
diff --git a/crates/polars-parquet/src/parquet/read/indexes/mod.rs b/crates/polars-parquet/src/parquet/read/indexes/mod.rs
deleted file mode 100644
index 1e1919c84c75..000000000000
--- a/crates/polars-parquet/src/parquet/read/indexes/mod.rs
+++ /dev/null
@@ -1,4 +0,0 @@
-mod deserialize;
-mod read;
-
-pub use read::*;
diff --git a/crates/polars-parquet/src/parquet/read/indexes/read.rs b/crates/polars-parquet/src/parquet/read/indexes/read.rs
deleted file mode 100644
index 1dbb5aa20fde..000000000000
--- a/crates/polars-parquet/src/parquet/read/indexes/read.rs
+++ /dev/null
@@ -1,134 +0,0 @@
-use std::io::{Cursor, Read, Seek, SeekFrom};
-
-use parquet_format_safe::thrift::protocol::TCompactInputProtocol;
-use parquet_format_safe::{ColumnChunk, OffsetIndex, PageLocation};
-
-use super::deserialize::deserialize;
-use crate::parquet::error::ParquetError;
-use crate::parquet::indexes::Index;
-use crate::parquet::metadata::ColumnChunkMetaData;
-
-fn prepare_read<F: Fn(&ColumnChunk) -> Option<i64>, G: Fn(&ColumnChunk) -> Option<i32>>(
-    chunks: &[ColumnChunkMetaData],
-    get_offset: F,
-    get_length: G,
-) -> Result<(u64, Vec<usize>), ParquetError> {
-    // c1: [start, length]
-    // ...
-    // cN: [start, length]
-
-    let first_chunk = if let Some(chunk) = chunks.first() {
-        chunk
-    } else {
-        return Ok((0, vec![]));
-    };
-    let metadata = first_chunk.column_chunk();
-
-    let offset: u64 = if let Some(offset) = get_offset(metadata) {
-        offset.try_into()?
-    } else {
-        return Ok((0, vec![]));
-    };
-
-    let lengths = chunks
-        .iter()
-        .map(|x| get_length(x.column_chunk()))
-        .map(|maybe_length| {
-            let index_length = maybe_length.ok_or_else(|| {
-                ParquetError::oos("The column length must exist if column offset exists")
-            })?;
-
-            Ok(index_length.try_into()?)
-        })
-        .collect::<Result<Vec<_>, ParquetError>>()?;
-
-    Ok((offset, lengths))
-}
-
-fn prepare_column_index_read(
-    chunks: &[ColumnChunkMetaData],
-) -> Result<(u64, Vec<usize>), ParquetError> {
-    prepare_read(chunks, |x| x.column_index_offset, |x| x.column_index_length)
-}
-
-fn prepare_offset_index_read(
-    chunks: &[ColumnChunkMetaData],
-) -> Result<(u64, Vec<usize>), ParquetError> {
-    prepare_read(chunks, |x| x.offset_index_offset, |x| x.offset_index_length)
-}
-
-fn deserialize_column_indexes(
-    chunks: &[ColumnChunkMetaData],
-    data: &[u8],
-    lengths: Vec<usize>,
-) -> Result<Vec<Box<dyn Index>>, ParquetError> {
-    let mut start = 0;
-    let data = lengths.into_iter().map(|length| {
-        let r = &data[start..start + length];
-        start += length;
-        r
-    });
-
-    chunks
-        .iter()
-        .zip(data)
-        .map(|(chunk, data)| {
-            let primitive_type = chunk.descriptor().descriptor.primitive_type.clone();
-            deserialize(data, primitive_type)
-        })
-        .collect()
-}
-
-/// Reads the column indexes of all [`ColumnChunkMetaData`] and deserializes them into [`Index`].
-/// Returns an empty vector if indexes are not available
-pub fn read_columns_indexes<R: Read + Seek>(
-    reader: &mut R,
-    chunks: &[ColumnChunkMetaData],
-) -> Result<Vec<Box<dyn Index>>, ParquetError> {
-    let (offset, lengths) = prepare_column_index_read(chunks)?;
-
-    let length = lengths.iter().sum::<usize>();
-
-    reader.seek(SeekFrom::Start(offset))?;
-
-    let mut data = vec![];
-    data.try_reserve(length)?;
-    reader.by_ref().take(length as u64).read_to_end(&mut data)?;
-
-    deserialize_column_indexes(chunks, &data, lengths)
-}
-
-fn deserialize_page_locations(
-    data: &[u8],
-    column_number: usize,
-) -> Result<Vec<Vec<PageLocation>>, ParquetError> {
-    let len = data.len() * 2 + 1024;
-    let mut reader = Cursor::new(data);
-
-    (0..column_number)
-        .map(|_| {
-            let mut prot = TCompactInputProtocol::new(&mut reader, len);
-            let offset = OffsetIndex::read_from_in_protocol(&mut prot)?;
-            Ok(offset.page_locations)
-        })
-        .collect()
-}
-
-/// Read [`PageLocation`]s from the [`ColumnChunkMetaData`]s.
-/// Returns an empty vector if indexes are not available
-pub fn read_pages_locations<R: Read + Seek>(
-    reader: &mut R,
-    chunks: &[ColumnChunkMetaData],
-) -> Result<Vec<Vec<PageLocation>>, ParquetError> {
-    let (offset, lengths) = prepare_offset_index_read(chunks)?;
-
-    let length = lengths.iter().sum::<usize>();
-
-    reader.seek(SeekFrom::Start(offset))?;
-
-    let mut data = vec![];
-    data.try_reserve(length)?;
-    reader.by_ref().take(length as u64).read_to_end(&mut data)?;
-
-    deserialize_page_locations(&data, chunks.len())
-}
diff --git a/crates/polars-parquet/src/parquet/read/mod.rs b/crates/polars-parquet/src/parquet/read/mod.rs
index e3426a38dc3c..ffd1534f928c 100644
--- a/crates/polars-parquet/src/parquet/read/mod.rs
+++ b/crates/polars-parquet/src/parquet/read/mod.rs
@@ -1,6 +1,5 @@
 mod column;
 mod compression;
-mod indexes;
 pub mod levels;
 mod metadata;
 mod page;
@@ -11,7 +10,6 @@ use std::io::{Seek, SeekFrom};
 
 pub use column::*;
 pub use compression::{decompress, BasicDecompressor};
-pub use indexes::{read_columns_indexes, read_pages_locations};
 pub use metadata::{deserialize_metadata, read_metadata, read_metadata_with_size};
 #[cfg(feature = "async")]
 pub use page::{get_page_stream, get_page_stream_from_column_start};
diff --git a/crates/polars-parquet/src/parquet/read/page/reader.rs b/crates/polars-parquet/src/parquet/read/page/reader.rs
index dcc94d51dec3..f01cf55c4a8e 100644
--- a/crates/polars-parquet/src/parquet/read/page/reader.rs
+++ b/crates/polars-parquet/src/parquet/read/page/reader.rs
@@ -7,7 +7,6 @@ use polars_utils::mmap::{MemReader, MemSlice};
 use super::PageIterator;
 use crate::parquet::compression::Compression;
 use crate::parquet::error::{ParquetError, ParquetResult};
-use crate::parquet::indexes::Interval;
 use crate::parquet::metadata::{ColumnChunkMetaData, Descriptor};
 use crate::parquet::page::{
     CompressedDataPage, CompressedDictPage, CompressedPage, DataPageHeader, PageType,
@@ -58,6 +57,7 @@ impl From<&ColumnChunkMetaData> for PageMetaData {
 
 /// A fallible [`Iterator`] of [`CompressedDataPage`]. This iterator reads pages back
 /// to back until all pages have been consumed.
+///
 /// The pages from this iterator always have [`None`] [`crate::parquet::page::CompressedDataPage::selected_rows()`] since
 /// filter pushdown is not supported without a
 /// pre-computed [page index](https://github.com/apache/parquet-format/blob/master/PageIndex.md).
@@ -159,14 +159,7 @@ impl PageReader {
             ));
         }
 
-        finish_page(
-            page_header,
-            buffer,
-            self.compression,
-            &self.descriptor,
-            None,
-        )
-        .map(|p| {
+        finish_page(page_header, buffer, self.compression, &self.descriptor).map(|p| {
             if let CompressedPage::Dict(d) = p {
                 Some(d)
             } else {
@@ -234,14 +227,7 @@ pub(super) fn build_page(reader: &mut PageReader) -> ParquetResult<Option<Compre
         ));
     }
 
-    finish_page(
-        page_header,
-        buffer,
-        reader.compression,
-        &reader.descriptor,
-        None,
-    )
-    .map(Some)
+    finish_page(page_header, buffer, reader.compression, &reader.descriptor).map(Some)
 }
 
 pub(super) fn finish_page(
@@ -249,7 +235,6 @@ pub(super) fn finish_page(
     data: MemSlice,
     compression: Compression,
     descriptor: &Descriptor,
-    selected_rows: Option<Vec<Interval>>,
 ) -> ParquetResult<CompressedPage> {
     let type_ = page_header.type_.try_into()?;
     let uncompressed_page_size = page_header.uncompressed_page_size.try_into()?;
@@ -302,7 +287,6 @@ pub(super) fn finish_page(
                 compression,
                 uncompressed_page_size,
                 descriptor.clone(),
-                selected_rows,
             )))
         },
         PageType::DataPageV2 => {
@@ -325,7 +309,6 @@ pub(super) fn finish_page(
                 compression,
                 uncompressed_page_size,
                 descriptor.clone(),
-                selected_rows,
             )))
         },
     }
diff --git a/crates/polars-parquet/src/parquet/read/page/stream.rs b/crates/polars-parquet/src/parquet/read/page/stream.rs
index bc1ccb32880e..0101196f3752 100644
--- a/crates/polars-parquet/src/parquet/read/page/stream.rs
+++ b/crates/polars-parquet/src/parquet/read/page/stream.rs
@@ -100,7 +100,6 @@ fn _get_page_stream<R: AsyncRead + Unpin + Send>(
                 MemSlice::from_vec(std::mem::take(&mut scratch)),
                 compression,
                 &descriptor,
-                None,
             )?;
         }
     }
diff --git a/crates/polars-parquet/src/parquet/schema/io_message/from_message.rs b/crates/polars-parquet/src/parquet/schema/io_message/from_message.rs
index 36da3d5edcd1..3098241d8425 100644
--- a/crates/polars-parquet/src/parquet/schema/io_message/from_message.rs
+++ b/crates/polars-parquet/src/parquet/schema/io_message/from_message.rs
@@ -158,9 +158,11 @@ fn type_from_str(s: &str) -> ParquetResult<Type> {
     }
 }
 
-/// Parses message type as string into a Parquet [`ParquetType`](crate::parquet::schema::types::ParquetType)
-/// which, for example, could be used to extract individual columns. Returns Parquet
-/// general error when parsing or validation fails.
+/// Parses message type as string into a Parquet [`ParquetType`](crate::parquet::schema::types::ParquetType).
+///
+/// This could, for example, be used to extract individual columns.
+///
+/// Returns Parquet general error when parsing or validation fails.
 pub fn from_message(message_type: &str) -> ParquetResult<ParquetType> {
     let mut parser = Parser {
         tokenizer: &mut Tokenizer::from_str(message_type),
diff --git a/crates/polars-parquet/src/parquet/write/compression.rs b/crates/polars-parquet/src/parquet/write/compression.rs
index 1c7d4d36a901..04d01a6e34bc 100644
--- a/crates/polars-parquet/src/parquet/write/compression.rs
+++ b/crates/polars-parquet/src/parquet/write/compression.rs
@@ -16,9 +16,10 @@ fn compress_data(
         mut buffer,
         header,
         descriptor,
-        selected_rows,
+        num_rows,
     } = page;
     let uncompressed_page_size = buffer.len();
+    let num_rows = num_rows.expect("We should have num_rows when we are writing");
     if compression != CompressionOptions::Uncompressed {
         match &header {
             DataPageHeader::V1(_) => {
@@ -40,13 +41,13 @@ fn compress_data(
         std::mem::swap(buffer.to_mut(), &mut compressed_buffer);
     }
 
-    Ok(CompressedDataPage::new_read(
+    Ok(CompressedDataPage::new(
         header,
         CowBuffer::Owned(compressed_buffer),
         compression.into(),
         uncompressed_page_size,
         descriptor,
-        selected_rows,
+        num_rows,
     ))
 }
 
diff --git a/crates/polars-parquet/src/parquet/write/indexes/serialize.rs b/crates/polars-parquet/src/parquet/write/indexes/serialize.rs
index 8b3cebec1686..14594bc2b8c4 100644
--- a/crates/polars-parquet/src/parquet/write/indexes/serialize.rs
+++ b/crates/polars-parquet/src/parquet/write/indexes/serialize.rs
@@ -62,11 +62,7 @@ pub fn serialize_offset_index(pages: &[PageWriteSpec]) -> ParquetResult<OffsetIn
                 compressed_page_size: spec.bytes_written.try_into()?,
                 first_row_index,
             };
-            let num_rows = spec.num_rows.ok_or_else(|| {
-                ParquetError::oos(
-                    "options were set to write statistics but some data pages miss number of rows",
-                )
-            })?;
+            let num_rows = spec.num_rows;
             first_row_index += num_rows as i64;
             Ok(location)
         })
diff --git a/crates/polars-parquet/src/parquet/write/page.rs b/crates/polars-parquet/src/parquet/write/page.rs
index 22216b36025a..f9e527d5a9db 100644
--- a/crates/polars-parquet/src/parquet/write/page.rs
+++ b/crates/polars-parquet/src/parquet/write/page.rs
@@ -41,7 +41,8 @@ pub struct PageWriteSpec {
     pub header: ParquetPageHeader,
     #[allow(dead_code)]
     pub num_values: usize,
-    pub num_rows: Option<usize>,
+    /// The number of actual rows. For non-nested values, this is equal to the number of values.
+    pub num_rows: usize,
     pub header_size: u64,
     pub offset: u64,
     pub bytes_written: u64,
@@ -55,7 +56,9 @@ pub fn write_page<W: Write>(
     compressed_page: &CompressedPage,
 ) -> ParquetResult<PageWriteSpec> {
     let num_values = compressed_page.num_values();
-    let selected_rows = compressed_page.selected_rows();
+    let num_rows = compressed_page
+        .num_rows()
+        .expect("We should have num_rows when we are writing");
 
     let header = match &compressed_page {
         CompressedPage::Data(compressed_page) => assemble_data_page_header(compressed_page),
@@ -88,8 +91,8 @@ pub fn write_page<W: Write>(
         bytes_written,
         compression: compressed_page.compression(),
         statistics,
-        num_rows: selected_rows.map(|x| x.last().unwrap().length),
         num_values,
+        num_rows,
     })
 }
 
@@ -101,7 +104,9 @@ pub async fn write_page_async<W: AsyncWrite + Unpin + Send>(
     compressed_page: &CompressedPage,
 ) -> ParquetResult<PageWriteSpec> {
     let num_values = compressed_page.num_values();
-    let selected_rows = compressed_page.selected_rows();
+    let num_rows = compressed_page
+        .num_rows()
+        .expect("We should have the num_rows when we are writing");
 
     let header = match &compressed_page {
         CompressedPage::Data(compressed_page) => assemble_data_page_header(compressed_page),
@@ -134,7 +139,7 @@ pub async fn write_page_async<W: AsyncWrite + Unpin + Send>(
         bytes_written,
         compression: compressed_page.compression(),
         statistics,
-        num_rows: selected_rows.map(|x| x.last().unwrap().length),
+        num_rows,
         num_values,
     })
 }
diff --git a/crates/polars-parquet/src/parquet/write/row_group.rs b/crates/polars-parquet/src/parquet/write/row_group.rs
index e5c535055ea6..68c25a9c40fb 100644
--- a/crates/polars-parquet/src/parquet/write/row_group.rs
+++ b/crates/polars-parquet/src/parquet/write/row_group.rs
@@ -58,9 +58,7 @@ fn compute_num_rows(columns: &[(ColumnChunk, Vec<PageWriteSpec>)]) -> ParquetRes
                 .iter()
                 .filter(|x| is_data_page(x))
                 .try_for_each(|spec| {
-                    num_rows += spec.num_rows.ok_or_else(|| {
-                        ParquetError::oos("All data pages must declare the number of rows on it")
-                    })? as i64;
+                    num_rows += spec.num_rows as i64;
                     ParquetResult::Ok(())
                 })?;
             ParquetResult::Ok(num_rows)
diff --git a/crates/polars-pipe/src/executors/sources/csv.rs b/crates/polars-pipe/src/executors/sources/csv.rs
index 383803f816ea..5155d7bdfcff 100644
--- a/crates/polars-pipe/src/executors/sources/csv.rs
+++ b/crates/polars-pipe/src/executors/sources/csv.rs
@@ -211,7 +211,7 @@ impl Source for CsvSource {
 
             if let Some(ca) = &mut self.include_file_path {
                 if ca.len() < max_height {
-                    *ca = ca.new_from_index(max_height, 0);
+                    *ca = ca.new_from_index(0, max_height);
                 };
 
                 for data_chunk in &mut out {
diff --git a/crates/polars-pipe/src/executors/sources/parquet.rs b/crates/polars-pipe/src/executors/sources/parquet.rs
index a897df0c2478..cd0cb58f3574 100644
--- a/crates/polars-pipe/src/executors/sources/parquet.rs
+++ b/crates/polars-pipe/src/executors/sources/parquet.rs
@@ -1,8 +1,10 @@
 use std::collections::VecDeque;
 use std::ops::Range;
 use std::path::PathBuf;
+use std::sync::atomic::AtomicUsize;
 use std::sync::Arc;
 
+use futures::{StreamExt, TryStreamExt};
 use polars_core::config::{self, get_file_prefetch_size};
 use polars_core::error::*;
 use polars_core::prelude::Series;
@@ -32,7 +34,7 @@ pub struct ParquetSource {
     batched_readers: VecDeque<BatchedParquetReader>,
     n_threads: usize,
     processed_paths: usize,
-    processed_rows: usize,
+    processed_rows: AtomicUsize,
     iter: Range<usize>,
     paths: Arc<Vec<PathBuf>>,
     options: ParquetOptions,
@@ -110,11 +112,13 @@ impl ParquetSource {
     }
 
     fn init_reader_sync(&mut self) -> PolarsResult<()> {
+        use std::sync::atomic::Ordering;
+
         let Some(index) = self.iter.next() else {
             return Ok(());
         };
         if let Some(slice) = self.file_options.slice {
-            if self.processed_rows >= slice.0 as usize + slice.1 {
+            if self.processed_rows.load(Ordering::Relaxed) >= slice.0 as usize + slice.1 {
                 return Ok(());
             }
         }
@@ -147,20 +151,22 @@ impl ParquetSource {
                 );
 
             let n_rows_this_file = reader.num_rows().unwrap();
+            let current_row_offset = self
+                .processed_rows
+                .fetch_add(n_rows_this_file, Ordering::Relaxed);
 
             let slice = file_options.slice.map(|slice| {
                 assert!(slice.0 >= 0);
                 let slice_start = slice.0 as usize;
                 let slice_end = slice_start + slice.1;
                 split_slice_at_file(
-                    &mut self.processed_rows.clone(),
+                    &mut current_row_offset.clone(),
                     n_rows_this_file,
                     slice_start,
                     slice_end,
                 )
             });
 
-            self.processed_rows += n_rows_this_file;
             reader = reader.with_slice(slice);
             reader.batched(chunk_size)?
         };
@@ -174,42 +180,64 @@ impl ParquetSource {
         Ok(())
     }
 
+    /// This function must NOT be run concurrently if there is a slice (or any operation that
+    /// requires `self.processed_rows` to be incremented in the correct order), as it does not
+    /// coordinate to increment the row offset in a properly ordered manner.
     #[cfg(feature = "async")]
     async fn init_reader_async(&self, index: usize) -> PolarsResult<BatchedParquetReader> {
+        use std::sync::atomic::Ordering;
+
         let metadata = self.metadata.clone();
         let predicate = self.predicate.clone();
         let cloud_options = self.cloud_options.clone();
         let (path, options, file_options, projection, chunk_size, hive_partitions) =
             self.prepare_init_reader(index)?;
 
-        assert_eq!(file_options.slice, None);
-
         let batched_reader = {
             let uri = path.to_string_lossy();
-            ParquetAsyncReader::from_uri(&uri, cloud_options.as_ref(), metadata)
-                .await?
-                .with_row_index(file_options.row_index)
-                .with_projection(projection)
-                .check_schema(
-                    self.file_info
-                        .reader_schema
-                        .as_ref()
-                        .unwrap()
-                        .as_ref()
-                        .unwrap_left(),
-                )
-                .await?
-                .with_predicate(predicate.clone())
-                .use_statistics(options.use_statistics)
-                .with_hive_partition_columns(hive_partitions)
-                .with_include_file_path(
-                    self.file_options
-                        .include_file_paths
-                        .as_ref()
-                        .map(|x| (x.clone(), Arc::from(path.to_str().unwrap()))),
+
+            let mut async_reader =
+                ParquetAsyncReader::from_uri(&uri, cloud_options.as_ref(), metadata)
+                    .await?
+                    .with_row_index(file_options.row_index)
+                    .with_projection(projection)
+                    .check_schema(
+                        self.file_info
+                            .reader_schema
+                            .as_ref()
+                            .unwrap()
+                            .as_ref()
+                            .unwrap_left(),
+                    )
+                    .await?
+                    .with_predicate(predicate.clone())
+                    .use_statistics(options.use_statistics)
+                    .with_hive_partition_columns(hive_partitions)
+                    .with_include_file_path(
+                        self.file_options
+                            .include_file_paths
+                            .as_ref()
+                            .map(|x| (x.clone(), Arc::from(path.to_str().unwrap()))),
+                    );
+
+            let n_rows_this_file = async_reader.num_rows().await?;
+            let current_row_offset = self
+                .processed_rows
+                .fetch_add(n_rows_this_file, Ordering::Relaxed);
+
+            let slice = file_options.slice.map(|slice| {
+                assert!(slice.0 >= 0);
+                let slice_start = slice.0 as usize;
+                let slice_end = slice_start + slice.1;
+                split_slice_at_file(
+                    &mut current_row_offset.clone(),
+                    n_rows_this_file,
+                    slice_start,
+                    slice_end,
                 )
-                .batched(chunk_size)
-                .await?
+            });
+
+            async_reader.with_slice(slice).batched(chunk_size).await?
         };
         Ok(batched_reader)
     }
@@ -241,7 +269,7 @@ impl ParquetSource {
             batched_readers: VecDeque::new(),
             n_threads,
             processed_paths: 0,
-            processed_rows: 0,
+            processed_rows: AtomicUsize::new(0),
             options,
             file_options,
             iter,
@@ -269,29 +297,36 @@ impl ParquetSource {
         //
         // It is important we do this for a reasonable batch size, that's why we start this when we
         // have just 2 readers left.
-        if self.file_options.slice.is_none()
-            && self.run_async
-            && (self.batched_readers.len() <= 2 || self.batched_readers.is_empty())
-        {
+        if self.run_async {
             #[cfg(not(feature = "async"))]
             panic!("activate 'async' feature");
 
             #[cfg(feature = "async")]
             {
-                let range = 0..self.prefetch_size - self.batched_readers.len();
-                let range = range
-                    .zip(&mut self.iter)
-                    .map(|(_, index)| index)
-                    .collect::<Vec<_>>();
-                let init_iter = range.into_iter().map(|index| self.init_reader_async(index));
-
-                let batched_readers =
-                    polars_io::pl_async::get_runtime().block_on_potential_spawn(async {
-                        futures::future::try_join_all(init_iter).await
-                    })?;
-
-                for r in batched_readers {
-                    self.finish_init_reader(r)?;
+                if self.batched_readers.len() <= 2 || self.batched_readers.is_empty() {
+                    let range = 0..self.prefetch_size - self.batched_readers.len();
+                    let range = range
+                        .zip(&mut self.iter)
+                        .map(|(_, index)| index)
+                        .collect::<Vec<_>>();
+                    let init_iter = range.into_iter().map(|index| self.init_reader_async(index));
+
+                    let batched_readers = if self.file_options.slice.is_some() {
+                        polars_io::pl_async::get_runtime().block_on_potential_spawn(async {
+                            futures::stream::iter(init_iter)
+                                .then(|x| x)
+                                .try_collect()
+                                .await
+                        })?
+                    } else {
+                        polars_io::pl_async::get_runtime().block_on_potential_spawn(async {
+                            futures::future::try_join_all(init_iter).await
+                        })?
+                    };
+
+                    for r in batched_readers {
+                        self.finish_init_reader(r)?;
+                    }
                 }
             }
         } else {
diff --git a/crates/polars-plan/Cargo.toml b/crates/polars-plan/Cargo.toml
index 28b867c8580c..5d1cdc79ab15 100644
--- a/crates/polars-plan/Cargo.toml
+++ b/crates/polars-plan/Cargo.toml
@@ -184,6 +184,7 @@ offset_by = ["polars-time/offset_by"]
 
 bigidx = ["polars-core/bigidx"]
 polars_cloud = ["serde", "ciborium"]
+ir_serde = ["serde", "polars-utils/ir_serde"]
 
 panic_on_schema = []
 
diff --git a/crates/polars-plan/src/client/mod.rs b/crates/polars-plan/src/client/mod.rs
index a815babcc6ad..f5a5cdb0f763 100644
--- a/crates/polars-plan/src/client/mod.rs
+++ b/crates/polars-plan/src/client/mod.rs
@@ -1,38 +1,18 @@
 mod check;
 
-use std::sync::Arc;
+use arrow::legacy::error::to_compute_err;
+use polars_core::error::PolarsResult;
 
-use polars_core::error::{polars_ensure, polars_err, PolarsResult};
-use polars_io::parquet::write::ParquetWriteOptions;
-use polars_io::path_utils::is_cloud_url;
-
-use crate::plans::options::{FileType, SinkType};
 use crate::plans::DslPlan;
 
 /// Prepare the given [`DslPlan`] for execution on Polars Cloud.
-pub fn prepare_cloud_plan(dsl: DslPlan, uri: String) -> PolarsResult<Vec<u8>> {
+pub fn prepare_cloud_plan(dsl: DslPlan) -> PolarsResult<Vec<u8>> {
     // Check the plan for cloud eligibility.
     check::assert_cloud_eligible(&dsl)?;
 
-    // Add Sink node.
-    polars_ensure!(
-        is_cloud_url(&uri),
-        InvalidOperation: "non-cloud paths not supported: {uri}"
-    );
-    let sink_type = SinkType::Cloud {
-        uri: Arc::new(uri),
-        file_type: FileType::Parquet(ParquetWriteOptions::default()),
-        cloud_options: None,
-    };
-    let dsl = DslPlan::Sink {
-        input: Arc::new(dsl),
-        payload: sink_type,
-    };
-
     // Serialize the plan.
     let mut writer = Vec::new();
-    ciborium::into_writer(&dsl, &mut writer)
-        .map_err(|err| polars_err!(ComputeError: err.to_string()))?;
+    ciborium::into_writer(&dsl, &mut writer).map_err(to_compute_err)?;
 
     Ok(writer)
 }
diff --git a/crates/polars-plan/src/dsl/expr.rs b/crates/polars-plan/src/dsl/expr.rs
index 089813e494a8..da6accf80c6f 100644
--- a/crates/polars-plan/src/dsl/expr.rs
+++ b/crates/polars-plan/src/dsl/expr.rs
@@ -61,9 +61,11 @@ impl AsRef<Expr> for AggExpr {
     }
 }
 
-/// Expressions that can be used in various contexts. Queries consist of multiple expressions. When using the polars
-/// lazy API, don't construct an `Expr` directly; instead, create one using the functions in the `polars_lazy::dsl`
-/// module. See that module's docs for more info.
+/// Expressions that can be used in various contexts.
+///
+/// Queries consist of multiple expressions.
+/// When using the polars lazy API, don't construct an `Expr` directly; instead, create one using
+/// the functions in the `polars_lazy::dsl` module. See that module's docs for more info.
 #[derive(Clone, PartialEq)]
 #[must_use]
 #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
diff --git a/crates/polars-plan/src/dsl/function_expr/datetime.rs b/crates/polars-plan/src/dsl/function_expr/datetime.rs
index 8e2db0ec8134..604c915c817a 100644
--- a/crates/polars-plan/src/dsl/function_expr/datetime.rs
+++ b/crates/polars-plan/src/dsl/function_expr/datetime.rs
@@ -311,24 +311,31 @@ pub(super) fn microsecond(s: &Series) -> PolarsResult<Series> {
 pub(super) fn nanosecond(s: &Series) -> PolarsResult<Series> {
     s.nanosecond().map(|ca| ca.into_series())
 }
+#[cfg(feature = "dtype-duration")]
 pub(super) fn total_days(s: &Series) -> PolarsResult<Series> {
     s.duration().map(|ca| ca.days().into_series())
 }
+#[cfg(feature = "dtype-duration")]
 pub(super) fn total_hours(s: &Series) -> PolarsResult<Series> {
     s.duration().map(|ca| ca.hours().into_series())
 }
+#[cfg(feature = "dtype-duration")]
 pub(super) fn total_minutes(s: &Series) -> PolarsResult<Series> {
     s.duration().map(|ca| ca.minutes().into_series())
 }
+#[cfg(feature = "dtype-duration")]
 pub(super) fn total_seconds(s: &Series) -> PolarsResult<Series> {
     s.duration().map(|ca| ca.seconds().into_series())
 }
+#[cfg(feature = "dtype-duration")]
 pub(super) fn total_milliseconds(s: &Series) -> PolarsResult<Series> {
     s.duration().map(|ca| ca.milliseconds().into_series())
 }
+#[cfg(feature = "dtype-duration")]
 pub(super) fn total_microseconds(s: &Series) -> PolarsResult<Series> {
     s.duration().map(|ca| ca.microseconds().into_series())
 }
+#[cfg(feature = "dtype-duration")]
 pub(super) fn total_nanoseconds(s: &Series) -> PolarsResult<Series> {
     s.duration().map(|ca| ca.nanoseconds().into_series())
 }
diff --git a/crates/polars-plan/src/dsl/functions/correlation.rs b/crates/polars-plan/src/dsl/functions/correlation.rs
index bb0fc5aa3cf1..dd7521ad20a9 100644
--- a/crates/polars-plan/src/dsl/functions/correlation.rs
+++ b/crates/polars-plan/src/dsl/functions/correlation.rs
@@ -79,11 +79,15 @@ pub fn rolling_corr(x: Expr, y: Expr, options: RollingCovOptions) -> Expr {
         ..Default::default()
     };
 
+    let non_null_mask = when(x.clone().is_not_null().and(y.clone().is_not_null()))
+        .then(lit(1.0))
+        .otherwise(lit(Null {}));
+
     let mean_x_y = (x.clone() * y.clone()).rolling_mean(rolling_options.clone());
-    let mean_x = x.clone().rolling_mean(rolling_options.clone());
-    let mean_y = y.clone().rolling_mean(rolling_options.clone());
-    let var_x = x.clone().rolling_var(rolling_options.clone());
-    let var_y = y.clone().rolling_var(rolling_options);
+    let mean_x = (x.clone() * non_null_mask.clone()).rolling_mean(rolling_options.clone());
+    let mean_y = (y.clone() * non_null_mask.clone()).rolling_mean(rolling_options.clone());
+    let var_x = (x.clone() * non_null_mask.clone()).rolling_var(rolling_options.clone());
+    let var_y = (y.clone() * non_null_mask.clone()).rolling_var(rolling_options);
 
     let rolling_options_count = RollingOptionsFixedWindow {
         window_size: options.window_size as usize,
@@ -110,9 +114,13 @@ pub fn rolling_cov(x: Expr, y: Expr, options: RollingCovOptions) -> Expr {
         ..Default::default()
     };
 
+    let non_null_mask = when(x.clone().is_not_null().and(y.clone().is_not_null()))
+        .then(lit(1.0))
+        .otherwise(lit(Null {}));
+
     let mean_x_y = (x.clone() * y.clone()).rolling_mean(rolling_options.clone());
-    let mean_x = x.clone().rolling_mean(rolling_options.clone());
-    let mean_y = y.clone().rolling_mean(rolling_options);
+    let mean_x = (x.clone() * non_null_mask.clone()).rolling_mean(rolling_options.clone());
+    let mean_y = (y.clone() * non_null_mask.clone()).rolling_mean(rolling_options);
     let rolling_options_count = RollingOptionsFixedWindow {
         window_size: options.window_size as usize,
         min_periods: 0,
diff --git a/crates/polars-plan/src/dsl/functions/index.rs b/crates/polars-plan/src/dsl/functions/index.rs
index d125ce571307..7a452a033245 100644
--- a/crates/polars-plan/src/dsl/functions/index.rs
+++ b/crates/polars-plan/src/dsl/functions/index.rs
@@ -1,6 +1,7 @@
 use super::*;
 
 /// Find the indexes that would sort these series in order of appearance.
+///
 /// That means that the first `Series` will be used to determine the ordering
 /// until duplicates are found. Once duplicates are found, the next `Series` will
 /// be used and so on.
diff --git a/crates/polars-plan/src/dsl/functions/repeat.rs b/crates/polars-plan/src/dsl/functions/repeat.rs
index 1b32abc97b5a..9da42c36242f 100644
--- a/crates/polars-plan/src/dsl/functions/repeat.rs
+++ b/crates/polars-plan/src/dsl/functions/repeat.rs
@@ -1,8 +1,9 @@
 use super::*;
 
-/// Create a column of length `n` containing `n` copies of the literal `value`. Generally you won't need this function,
-/// as `lit(value)` already represents a column containing only `value` whose length is automatically set to the correct
-/// number of rows.
+/// Create a column of length `n` containing `n` copies of the literal `value`.
+///
+/// Generally you won't need this function, as `lit(value)` already represents a column containing
+/// only `value` whose length is automatically set to the correct number of rows.
 pub fn repeat<E: Into<Expr>>(value: E, n: Expr) -> Expr {
     let function = |s: Series, n: Series| {
         polars_ensure!(
diff --git a/crates/polars-plan/src/dsl/mod.rs b/crates/polars-plan/src/dsl/mod.rs
index 16c1fcb6fd85..166c7f5e7962 100644
--- a/crates/polars-plan/src/dsl/mod.rs
+++ b/crates/polars-plan/src/dsl/mod.rs
@@ -1707,12 +1707,20 @@ impl Expr {
 
     /// Get maximal value that could be hold by this dtype.
     pub fn upper_bound(self) -> Expr {
-        self.map_private(FunctionExpr::UpperBound)
+        self.apply_private(FunctionExpr::UpperBound)
+            .with_function_options(|mut options| {
+                options.flags |= FunctionFlags::RETURNS_SCALAR;
+                options
+            })
     }
 
     /// Get minimal value that could be hold by this dtype.
     pub fn lower_bound(self) -> Expr {
-        self.map_private(FunctionExpr::LowerBound)
+        self.apply_private(FunctionExpr::LowerBound)
+            .with_function_options(|mut options| {
+                options.flags |= FunctionFlags::RETURNS_SCALAR;
+                options
+            })
     }
 
     pub fn reshape(self, dimensions: &[i64], nested_type: NestedType) -> Self {
diff --git a/crates/polars-plan/src/plans/aexpr/mod.rs b/crates/polars-plan/src/plans/aexpr/mod.rs
index 9e14563303b7..49e4a94a62a0 100644
--- a/crates/polars-plan/src/plans/aexpr/mod.rs
+++ b/crates/polars-plan/src/plans/aexpr/mod.rs
@@ -11,6 +11,8 @@ use polars_core::chunked_array::cast::CastOptions;
 use polars_core::prelude::*;
 use polars_core::utils::{get_time_units, try_get_supertype};
 use polars_utils::arena::{Arena, Node};
+#[cfg(feature = "ir_serde")]
+use serde::{Deserialize, Serialize};
 use strum_macros::IntoStaticStr;
 pub use utils::*;
 
@@ -19,6 +21,7 @@ use crate::plans::Context;
 use crate::prelude::*;
 
 #[derive(Clone, Debug, IntoStaticStr)]
+#[cfg_attr(feature = "ir_serde", derive(Serialize, Deserialize))]
 pub enum IRAggExpr {
     Min {
         input: Node,
@@ -125,6 +128,7 @@ impl From<IRAggExpr> for GroupByMethod {
 
 /// IR expression node that is allocated in an [`Arena`][polars_utils::arena::Arena].
 #[derive(Clone, Debug, Default)]
+#[cfg_attr(feature = "ir_serde", derive(Serialize, Deserialize))]
 pub enum AExpr {
     Explode(Node),
     Alias(Node, ColumnName),
@@ -164,6 +168,7 @@ pub enum AExpr {
         truthy: Node,
         falsy: Node,
     },
+    #[cfg_attr(feature = "ir_serde", serde(skip))]
     AnonymousFunction {
         input: Vec<ExprIR>,
         function: SpecialEq<Arc<dyn SeriesUdf>>,
diff --git a/crates/polars-plan/src/plans/builder_dsl.rs b/crates/polars-plan/src/plans/builder_dsl.rs
index 82fa483643d7..849be4a6e4ea 100644
--- a/crates/polars-plan/src/plans/builder_dsl.rs
+++ b/crates/polars-plan/src/plans/builder_dsl.rs
@@ -346,10 +346,13 @@ impl DslBuilder {
         .into()
     }
 
-    pub fn explode(self, columns: Vec<Selector>) -> Self {
+    pub fn explode(self, columns: Vec<Selector>, allow_empty: bool) -> Self {
         DslPlan::MapFunction {
             input: Arc::new(self.0),
-            function: DslFunction::Explode { columns },
+            function: DslFunction::Explode {
+                columns,
+                allow_empty,
+            },
         }
         .into()
     }
@@ -442,7 +445,7 @@ impl DslBuilder {
         function: F,
         optimizations: AllowedOptimizations,
         schema: Option<Arc<dyn UdfSchema>>,
-        name: &'static str,
+        name: &str,
     ) -> Self
     where
         F: DataFrameUdf + 'static,
@@ -457,7 +460,7 @@ impl DslBuilder {
                 predicate_pd: optimizations.contains(OptState::PREDICATE_PUSHDOWN),
                 projection_pd: optimizations.contains(OptState::PROJECTION_PUSHDOWN),
                 streamable: optimizations.contains(OptState::STREAMING),
-                fmt_str: name,
+                fmt_str: name.into(),
             }),
         }
         .into()
diff --git a/crates/polars-plan/src/plans/conversion/dsl_to_ir.rs b/crates/polars-plan/src/plans/conversion/dsl_to_ir.rs
index 8a92d90ffa1c..08b3a8b66d1e 100644
--- a/crates/polars-plan/src/plans/conversion/dsl_to_ir.rs
+++ b/crates/polars-plan/src/plans/conversion/dsl_to_ir.rs
@@ -636,6 +636,23 @@ pub fn to_alp_impl(
             let input_schema = lp_arena.get(input).schema(lp_arena);
 
             match function {
+                DslFunction::Explode {
+                    columns,
+                    allow_empty,
+                } => {
+                    let columns = expand_selectors(columns, &input_schema, &[])?;
+                    validate_columns_in_input(&columns, &input_schema, "explode")?;
+                    polars_ensure!(!columns.is_empty() || allow_empty, InvalidOperation: "no columns provided in explode");
+                    if columns.is_empty() {
+                        return Ok(input);
+                    }
+                    let function = FunctionIR::Explode {
+                        columns,
+                        schema: Default::default(),
+                    };
+                    let ir = IR::MapFunction { input, function };
+                    return Ok(lp_arena.add(ir));
+                },
                 DslFunction::FillNan(fill_value) => {
                     let exprs = input_schema
                         .iter()
@@ -794,8 +811,11 @@ pub fn to_alp_impl(
             IR::Sink { input, payload }
         },
         DslPlan::IR { node, dsl, version } => {
-            return if let (true, Some(node)) = (version == lp_arena.version(), node) {
-                Ok(node)
+            return if node.is_some()
+                && version == lp_arena.version()
+                && convert.used_arenas.insert(version)
+            {
+                Ok(node.unwrap())
             } else {
                 to_alp_impl(owned(dsl), expr_arena, lp_arena, convert)
             }
diff --git a/crates/polars-plan/src/plans/conversion/mod.rs b/crates/polars-plan/src/plans/conversion/mod.rs
index 250a44d1f524..8d7e232c4cd7 100644
--- a/crates/polars-plan/src/plans/conversion/mod.rs
+++ b/crates/polars-plan/src/plans/conversion/mod.rs
@@ -3,7 +3,12 @@ mod dsl_to_ir;
 mod expr_expansion;
 mod expr_to_ir;
 mod ir_to_dsl;
-#[cfg(any(feature = "ipc", feature = "parquet", feature = "csv"))]
+#[cfg(any(
+    feature = "ipc",
+    feature = "parquet",
+    feature = "csv",
+    feature = "json"
+))]
 mod scans;
 mod stack_opt;
 
diff --git a/crates/polars-plan/src/plans/conversion/scans.rs b/crates/polars-plan/src/plans/conversion/scans.rs
index 959327148f6c..308e9b9d0511 100644
--- a/crates/polars-plan/src/plans/conversion/scans.rs
+++ b/crates/polars-plan/src/plans/conversion/scans.rs
@@ -40,7 +40,7 @@ fn prepare_schemas(mut schema: Schema, row_index: Option<&RowIndex>) -> (SchemaR
 pub(super) fn parquet_file_info(
     paths: &[PathBuf],
     file_options: &FileScanOptions,
-    cloud_options: Option<&polars_io::cloud::CloudOptions>,
+    #[allow(unused)] cloud_options: Option<&polars_io::cloud::CloudOptions>,
 ) -> PolarsResult<(FileInfo, Option<FileMetaDataRef>)> {
     let path = get_first_path(paths)?;
 
diff --git a/crates/polars-plan/src/plans/conversion/stack_opt.rs b/crates/polars-plan/src/plans/conversion/stack_opt.rs
index 6e05a872a8cf..8db4e82659d5 100644
--- a/crates/polars-plan/src/plans/conversion/stack_opt.rs
+++ b/crates/polars-plan/src/plans/conversion/stack_opt.rs
@@ -7,6 +7,12 @@ pub(super) struct ConversionOptimizer {
     scratch: Vec<Node>,
     simplify: Option<SimplifyExprRule>,
     coerce: Option<TypeCoercionRule>,
+    // IR's can be cached in the DSL.
+    // But if they are used multiple times in DSL (e.g. concat/join)
+    // then it can occur that we take a slot multiple times.
+    // So we keep track of the arena versions used and allow only
+    // one unique IR cache to be reused.
+    pub(super) used_arenas: PlHashSet<u32>,
 }
 
 impl ConversionOptimizer {
@@ -27,6 +33,7 @@ impl ConversionOptimizer {
             scratch: Vec::with_capacity(8),
             simplify,
             coerce,
+            used_arenas: Default::default(),
         }
     }
 
diff --git a/crates/polars-plan/src/plans/conversion/type_coercion/mod.rs b/crates/polars-plan/src/plans/conversion/type_coercion/mod.rs
index 9bbc614ca088..a8c2122f9b65 100644
--- a/crates/polars-plan/src/plans/conversion/type_coercion/mod.rs
+++ b/crates/polars-plan/src/plans/conversion/type_coercion/mod.rs
@@ -513,7 +513,7 @@ fn inline_or_prune_cast(
             },
             // We generate casted literal datetimes, so ensure we cast upon conversion
             // to create simpler expr trees.
-            #[cfg(feature = "temporal")]
+            #[cfg(feature = "dtype-datetime")]
             LiteralValue::DateTime(ts, tu, None) if dtype.is_date() => {
                 let from_size = time_unit_multiple(tu.to_arrow()) * SECONDS_IN_DAY;
                 LiteralValue::Date((*ts / from_size) as i32)
diff --git a/crates/polars-plan/src/plans/expr_ir.rs b/crates/polars-plan/src/plans/expr_ir.rs
index 1161406a44b9..d9c0886c201c 100644
--- a/crates/polars-plan/src/plans/expr_ir.rs
+++ b/crates/polars-plan/src/plans/expr_ir.rs
@@ -3,10 +3,14 @@ use std::hash::Hash;
 #[cfg(feature = "cse")]
 use std::hash::Hasher;
 
+#[cfg(feature = "ir_serde")]
+use serde::{Deserialize, Serialize};
+
 use super::*;
 use crate::constants::{get_len_name, LITERAL_NAME};
 
 #[derive(Default, Debug, Clone, Hash, PartialEq, Eq)]
+#[cfg_attr(feature = "ir_serde", derive(Serialize, Deserialize))]
 pub enum OutputName {
     /// No not yet set.
     #[default]
@@ -23,7 +27,7 @@ pub enum OutputName {
 }
 
 impl OutputName {
-    fn unwrap(&self) -> &ColumnName {
+    pub fn unwrap(&self) -> &ColumnName {
         match self {
             OutputName::Alias(name) => name,
             OutputName::ColumnLhs(name) => name,
@@ -40,6 +44,7 @@ impl OutputName {
 }
 
 #[derive(Clone, Debug, PartialEq, Eq)]
+#[cfg_attr(feature = "ir_serde", derive(Serialize, Deserialize))]
 pub struct ExprIR {
     /// Output name of this expression.
     output_name: OutputName,
@@ -146,7 +151,7 @@ impl ExprIR {
         self.output_name = OutputName::Alias(name)
     }
 
-    pub(crate) fn output_name_inner(&self) -> &OutputName {
+    pub fn output_name_inner(&self) -> &OutputName {
         &self.output_name
     }
 
diff --git a/crates/polars-plan/src/plans/functions/count.rs b/crates/polars-plan/src/plans/functions/count.rs
index fd92fdd9fc9d..d00f19e36f8a 100644
--- a/crates/polars-plan/src/plans/functions/count.rs
+++ b/crates/polars-plan/src/plans/functions/count.rs
@@ -1,17 +1,19 @@
 #[cfg(feature = "ipc")]
 use arrow::io::ipc::read::get_row_count as count_rows_ipc_sync;
-#[cfg(feature = "parquet")]
+#[cfg(any(feature = "parquet", feature = "json"))]
 use polars_io::cloud::CloudOptions;
 #[cfg(feature = "csv")]
 use polars_io::csv::read::count_rows as count_rows_csv;
+#[cfg(any(feature = "parquet", feature = "ipc", feature = "json"))]
+use polars_io::is_cloud_url;
 #[cfg(all(feature = "parquet", feature = "cloud"))]
 use polars_io::parquet::read::ParquetAsyncReader;
 #[cfg(feature = "parquet")]
 use polars_io::parquet::read::ParquetReader;
 #[cfg(all(feature = "parquet", feature = "async"))]
 use polars_io::pl_async::{get_runtime, with_concurrency_budget};
-#[cfg(any(feature = "parquet", feature = "ipc"))]
-use polars_io::{path_utils::is_cloud_url, SerReader};
+#[cfg(any(feature = "json", feature = "parquet"))]
+use polars_io::SerReader;
 
 use super::*;
 
@@ -90,7 +92,7 @@ pub fn count_rows(paths: &Arc<Vec<PathBuf>>, scan_type: &FileScan) -> PolarsResu
 #[cfg(feature = "parquet")]
 pub(super) fn count_rows_parquet(
     paths: &Arc<Vec<PathBuf>>,
-    cloud_options: Option<&CloudOptions>,
+    #[allow(unused)] cloud_options: Option<&CloudOptions>,
 ) -> PolarsResult<usize> {
     if paths.is_empty() {
         return Ok(0);
@@ -189,6 +191,7 @@ pub(super) fn count_rows_ndjson(
     cloud_options: Option<&CloudOptions>,
 ) -> PolarsResult<usize> {
     use polars_core::config;
+    use polars_io::utils::maybe_decompress_bytes;
 
     let run_async = !paths.is_empty() && is_cloud_url(&paths[0]) || config::force_async();
 
@@ -233,7 +236,12 @@ pub(super) fn count_rows_ndjson(
                 polars_utils::open_file(&paths[i])?
             };
 
-            let reader = polars_io::ndjson::core::JsonLineReader::new(f);
+            let mmap = unsafe { memmap::Mmap::map(&f).unwrap() };
+            let owned = &mut vec![];
+
+            let reader = polars_io::ndjson::core::JsonLineReader::new(std::io::Cursor::new(
+                maybe_decompress_bytes(mmap.as_ref(), owned)?,
+            ));
             reader.count()
         })
         .sum()
diff --git a/crates/polars-plan/src/plans/functions/dsl.rs b/crates/polars-plan/src/plans/functions/dsl.rs
index 76c7dc9d3211..458c7c6d8e28 100644
--- a/crates/polars-plan/src/plans/functions/dsl.rs
+++ b/crates/polars-plan/src/plans/functions/dsl.rs
@@ -29,6 +29,7 @@ pub enum DslFunction {
     OpaquePython(OpaquePythonUdf),
     Explode {
         columns: Vec<Selector>,
+        allow_empty: bool,
     },
     #[cfg(feature = "pivot")]
     Unpivot {
@@ -79,7 +80,7 @@ pub enum StatsFunction {
     Max,
 }
 
-fn validate_columns<S: AsRef<str>>(
+pub(crate) fn validate_columns_in_input<S: AsRef<str>>(
     columns: &[S],
     input_schema: &Schema,
     operation_name: &str,
@@ -93,20 +94,12 @@ fn validate_columns<S: AsRef<str>>(
 impl DslFunction {
     pub(crate) fn into_function_ir(self, input_schema: &Schema) -> PolarsResult<FunctionIR> {
         let function = match self {
-            DslFunction::Explode { columns } => {
-                let columns = expand_selectors(columns, input_schema, &[])?;
-                validate_columns(columns.as_ref(), input_schema, "explode")?;
-                FunctionIR::Explode {
-                    columns,
-                    schema: Default::default(),
-                }
-            },
             #[cfg(feature = "pivot")]
             DslFunction::Unpivot { args } => {
                 let on = expand_selectors(args.on, input_schema, &[])?;
                 let index = expand_selectors(args.index, input_schema, &[])?;
-                validate_columns(on.as_ref(), input_schema, "unpivot")?;
-                validate_columns(index.as_ref(), input_schema, "unpivot")?;
+                validate_columns_in_input(on.as_ref(), input_schema, "unpivot")?;
+                validate_columns_in_input(index.as_ref(), input_schema, "unpivot")?;
 
                 let args = UnpivotArgsIR {
                     on: on.iter().map(|s| s.as_ref().into()).collect(),
@@ -128,7 +121,7 @@ impl DslFunction {
             },
             DslFunction::Rename { existing, new } => {
                 let swapping = new.iter().any(|name| input_schema.get(name).is_some());
-                validate_columns(existing.as_ref(), input_schema, "rename")?;
+                validate_columns_in_input(existing.as_ref(), input_schema, "rename")?;
 
                 FunctionIR::Rename {
                     existing,
@@ -139,12 +132,15 @@ impl DslFunction {
             },
             DslFunction::Unnest(selectors) => {
                 let columns = expand_selectors(selectors, input_schema, &[])?;
-                validate_columns(columns.as_ref(), input_schema, "explode")?;
+                validate_columns_in_input(columns.as_ref(), input_schema, "explode")?;
                 FunctionIR::Unnest { columns }
             },
             #[cfg(feature = "python")]
             DslFunction::OpaquePython(inner) => FunctionIR::OpaquePython(inner),
-            DslFunction::Stats(_) | DslFunction::FillNan(_) | DslFunction::Drop(_) => {
+            DslFunction::Stats(_)
+            | DslFunction::FillNan(_)
+            | DslFunction::Drop(_)
+            | DslFunction::Explode { .. } => {
                 // We should not reach this.
                 panic!("impl error")
             },
diff --git a/crates/polars-plan/src/plans/functions/mod.rs b/crates/polars-plan/src/plans/functions/mod.rs
index fb3edbe12bd3..4e9f42f205ba 100644
--- a/crates/polars-plan/src/plans/functions/mod.rs
+++ b/crates/polars-plan/src/plans/functions/mod.rs
@@ -26,11 +26,13 @@ use crate::dsl::python_udf::PythonFunction;
 use crate::plans::functions::merge_sorted::merge_sorted;
 use crate::prelude::*;
 
+#[cfg_attr(feature = "ir_serde", derive(Serialize, Deserialize))]
 #[derive(Clone, IntoStaticStr)]
 #[strum(serialize_all = "SCREAMING_SNAKE_CASE")]
 pub enum FunctionIR {
     #[cfg(feature = "python")]
     OpaquePython(OpaquePythonUdf),
+    #[cfg_attr(feature = "ir_serde", serde(skip))]
     Opaque {
         function: Arc<dyn DataFrameUdf>,
         schema: Option<Arc<dyn UdfSchema>>,
@@ -40,7 +42,7 @@ pub enum FunctionIR {
         projection_pd: bool,
         streamable: bool,
         // used for formatting
-        fmt_str: &'static str,
+        fmt_str: String,
     },
     FastCount {
         paths: Arc<Vec<PathBuf>>,
@@ -48,6 +50,7 @@ pub enum FunctionIR {
         alias: Option<Arc<str>>,
     },
     /// Streaming engine pipeline
+    #[cfg_attr(feature = "ir_serde", serde(skip))]
     Pipeline {
         function: Arc<Mutex<dyn DataFrameUdfMut>>,
         schema: SchemaRef,
@@ -71,20 +74,24 @@ pub enum FunctionIR {
         new: Arc<[SmartString]>,
         // A column name gets swapped with an existing column
         swapping: bool,
+        #[cfg_attr(feature = "ir_serde", serde(skip))]
         schema: CachedSchema,
     },
     Explode {
         columns: Arc<[ColumnName]>,
+        #[cfg_attr(feature = "ir_serde", serde(skip))]
         schema: CachedSchema,
     },
     #[cfg(feature = "pivot")]
     Unpivot {
         args: Arc<UnpivotArgsIR>,
+        #[cfg_attr(feature = "ir_serde", serde(skip))]
         schema: CachedSchema,
     },
     RowIndex {
         name: Arc<str>,
         // Might be cached.
+        #[cfg_attr(feature = "ir_serde", serde(skip))]
         schema: CachedSchema,
         offset: Option<IdxSize>,
     },
diff --git a/crates/polars-plan/src/plans/ir/dot.rs b/crates/polars-plan/src/plans/ir/dot.rs
index 49e9bef1a3dc..8fb6dbe5444d 100644
--- a/crates/polars-plan/src/plans/ir/dot.rs
+++ b/crates/polars-plan/src/plans/ir/dot.rs
@@ -32,9 +32,9 @@ impl fmt::Display for DotNode {
 
 #[inline(always)]
 fn write_label<'a, 'b>(
-    f: &'b mut fmt::Formatter<'a>,
+    f: &'a mut fmt::Formatter<'b>,
     id: DotNode,
-    mut w: impl FnMut(&mut EscapeLabel<'a, 'b>) -> fmt::Result,
+    mut w: impl FnMut(&mut EscapeLabel<'a>) -> fmt::Result,
 ) -> fmt::Result {
     write!(f, "{INDENT}{id}[label=\"")?;
 
@@ -341,7 +341,7 @@ impl<'a> IRDotDisplay<'a> {
 }
 
 // A few utility structures for formatting
-pub(crate) struct PathsDisplay<'a>(pub &'a [PathBuf]);
+pub struct PathsDisplay<'a>(pub &'a [PathBuf]);
 struct NumColumns<'a>(Option<&'a [String]>);
 struct NumColumnsSchema<'a>(Option<&'a Schema>);
 struct OptionExprIRDisplay<'a>(Option<ExprIRDisplay<'a>>);
@@ -390,9 +390,9 @@ impl fmt::Display for OptionExprIRDisplay<'_> {
 }
 
 /// Utility structure to write to a [`fmt::Formatter`] whilst escaping the output as a label name
-struct EscapeLabel<'a, 'b>(&'b mut fmt::Formatter<'a>);
+pub struct EscapeLabel<'a>(pub &'a mut dyn fmt::Write);
 
-impl<'a, 'b> fmt::Write for EscapeLabel<'a, 'b> {
+impl<'a> fmt::Write for EscapeLabel<'a> {
     fn write_str(&mut self, mut s: &str) -> fmt::Result {
         loop {
             let mut char_indices = s.char_indices();
diff --git a/crates/polars-plan/src/plans/ir/mod.rs b/crates/polars-plan/src/plans/ir/mod.rs
index 8d30639f1fe1..b8b0378419d6 100644
--- a/crates/polars-plan/src/plans/ir/mod.rs
+++ b/crates/polars-plan/src/plans/ir/mod.rs
@@ -8,12 +8,14 @@ use std::borrow::Cow;
 use std::fmt;
 use std::path::PathBuf;
 
-pub use dot::IRDotDisplay;
+pub use dot::{EscapeLabel, IRDotDisplay, PathsDisplay};
 pub use format::{ExprIRDisplay, IRDisplay};
 use hive::HivePartitions;
 use polars_core::prelude::*;
 use polars_utils::idx_vec::UnitVec;
 use polars_utils::unitvec;
+#[cfg(feature = "ir_serde")]
+use serde::{Deserialize, Serialize};
 
 use crate::prelude::*;
 
@@ -33,6 +35,7 @@ pub struct IRPlanRef<'a> {
 /// [`IR`] is a representation of [`DslPlan`] with [`Node`]s which are allocated in an [`Arena`]
 /// In this IR the logical plan has access to the full dataset.
 #[derive(Clone, Debug, Default)]
+#[cfg_attr(feature = "ir_serde", derive(Serialize, Deserialize))]
 pub enum IR {
     #[cfg(feature = "python")]
     PythonScan {
@@ -105,6 +108,7 @@ pub enum IR {
         keys: Vec<ExprIR>,
         aggs: Vec<ExprIR>,
         schema: SchemaRef,
+        #[cfg_attr(feature = "ir_serde", serde(skip))]
         apply: Option<Arc<dyn DataFrameUdf>>,
         maintain_order: bool,
         options: Arc<GroupbyOptions>,
diff --git a/crates/polars-plan/src/plans/ir/schema.rs b/crates/polars-plan/src/plans/ir/schema.rs
index 5b5042e50377..1586463a8c0f 100644
--- a/crates/polars-plan/src/plans/ir/schema.rs
+++ b/crates/polars-plan/src/plans/ir/schema.rs
@@ -107,4 +107,60 @@ impl IR {
         };
         Cow::Borrowed(schema)
     }
+
+    /// Get the schema of the logical plan node, using caching.
+    #[recursive]
+    pub fn schema_with_cache<'a>(
+        node: Node,
+        arena: &'a Arena<IR>,
+        cache: &mut PlHashMap<Node, Arc<Schema>>,
+    ) -> Arc<Schema> {
+        use IR::*;
+        if let Some(schema) = cache.get(&node) {
+            return schema.clone();
+        }
+
+        let schema = match arena.get(node) {
+            #[cfg(feature = "python")]
+            PythonScan { options } => options
+                .output_schema
+                .as_ref()
+                .unwrap_or(&options.schema)
+                .clone(),
+            Union { inputs, .. } => IR::schema_with_cache(inputs[0], arena, cache),
+            HConcat { schema, .. } => schema.clone(),
+            Cache { input, .. }
+            | Sort { input, .. }
+            | Filter { input, .. }
+            | Distinct { input, .. }
+            | Sink { input, .. }
+            | Slice { input, .. } => IR::schema_with_cache(*input, arena, cache),
+            Scan {
+                output_schema,
+                file_info,
+                ..
+            } => output_schema.as_ref().unwrap_or(&file_info.schema).clone(),
+            DataFrameScan {
+                schema,
+                output_schema,
+                ..
+            } => output_schema.as_ref().unwrap_or(schema).clone(),
+            Select { schema, .. }
+            | Reduce { schema, .. }
+            | GroupBy { schema, .. }
+            | Join { schema, .. }
+            | HStack { schema, .. }
+            | ExtContext { schema, .. }
+            | SimpleProjection {
+                columns: schema, ..
+            } => schema.clone(),
+            MapFunction { input, function } => {
+                let input_schema = IR::schema_with_cache(*input, arena, cache);
+                function.schema(&input_schema).unwrap().into_owned()
+            },
+            Invalid => unreachable!(),
+        };
+        cache.insert(node, schema.clone());
+        schema
+    }
 }
diff --git a/crates/polars-plan/src/plans/mod.rs b/crates/polars-plan/src/plans/mod.rs
index ac8dff3e90fd..6967e743f5b3 100644
--- a/crates/polars-plan/src/plans/mod.rs
+++ b/crates/polars-plan/src/plans/mod.rs
@@ -213,7 +213,7 @@ impl Clone for DslPlan {
 
 impl Default for DslPlan {
     fn default() -> Self {
-        let df = DataFrame::new::<Series>(vec![]).unwrap();
+        let df = DataFrame::empty();
         let schema = df.schema();
         DslPlan::DataFrameScan {
             df: Arc::new(df),
diff --git a/crates/polars-plan/src/plans/optimizer/slice_pushdown_lp.rs b/crates/polars-plan/src/plans/optimizer/slice_pushdown_lp.rs
index 33edd4b6ed8f..f62bd9ee197d 100644
--- a/crates/polars-plan/src/plans/optimizer/slice_pushdown_lp.rs
+++ b/crates/polars-plan/src/plans/optimizer/slice_pushdown_lp.rs
@@ -413,7 +413,8 @@ impl SlicePushDown {
             // [Pushdown]
             // these nodes will be pushed down.
             // State is None, we can continue
-            m @(Select {..}, None)
+            m @(Select {..}, None) |
+            m @ (SimpleProjection {..}, _)
             => {
                 let (lp, state) = m;
                 self.pushdown_and_continue(lp, state, lp_arena, expr_arena)
@@ -431,14 +432,14 @@ impl SlicePushDown {
                 }
             }
             (HStack {input, exprs, schema, options}, _) => {
-                let check = can_pushdown_slice_past_projections(&exprs, expr_arena);
+                let (can_pushdown, all_elementwise_and_any_expr_has_column) = can_pushdown_slice_past_projections(&exprs, expr_arena);
 
                 if (
-                    // If the schema length is greater then an input column is being projected, so
+                    // If the schema length is greater than an input column is being projected, so
                     // the exprs in with_columns do not need to have an input column name.
-                    schema.len() > exprs.len() && check.0
+                    schema.len() > exprs.len() && can_pushdown
                 )
-                || check.1 // e.g. select(c).with_columns(c = c + 1)
+                || all_elementwise_and_any_expr_has_column // e.g. select(c).with_columns(c = c + 1)
                 {
                     let lp = HStack {input, exprs, schema, options};
                     self.pushdown_and_continue(lp, state, lp_arena, expr_arena)
diff --git a/crates/polars-plan/src/plans/options.rs b/crates/polars-plan/src/plans/options.rs
index 0cff24124ff1..85506b7f6a15 100644
--- a/crates/polars-plan/src/plans/options.rs
+++ b/crates/polars-plan/src/plans/options.rs
@@ -85,6 +85,7 @@ pub struct DistinctOptionsDSL {
 }
 
 #[derive(Clone, Debug, Eq, PartialEq, Hash)]
+#[cfg_attr(feature = "ir_serde", derive(Serialize, Deserialize))]
 pub struct DistinctOptionsIR {
     /// Subset of columns that will be taken into account.
     pub subset: Option<Arc<[ColumnName]>>,
@@ -213,6 +214,13 @@ impl FunctionOptions {
     pub fn check_lengths(&self) -> bool {
         self.check_lengths.0
     }
+
+    pub fn is_elementwise(&self) -> bool {
+        self.collect_groups == ApplyOptions::ElementWise
+            && !self
+                .flags
+                .contains(FunctionFlags::CHANGES_LENGTH | FunctionFlags::RETURNS_SCALAR)
+    }
 }
 
 impl Default for FunctionOptions {
diff --git a/crates/polars-python/src/cloud.rs b/crates/polars-python/src/cloud.rs
index 5c8a7d01eafe..dacca675c551 100644
--- a/crates/polars-python/src/cloud.rs
+++ b/crates/polars-python/src/cloud.rs
@@ -5,9 +5,9 @@ use crate::error::PyPolarsErr;
 use crate::PyLazyFrame;
 
 #[pyfunction]
-pub fn prepare_cloud_plan(lf: PyLazyFrame, uri: String, py: Python) -> PyResult<PyObject> {
+pub fn prepare_cloud_plan(lf: PyLazyFrame, py: Python) -> PyResult<PyObject> {
     let plan = lf.ldf.logical_plan;
-    let bytes = polars::prelude::prepare_cloud_plan(plan, uri).map_err(PyPolarsErr::from)?;
+    let bytes = polars::prelude::prepare_cloud_plan(plan).map_err(PyPolarsErr::from)?;
 
     Ok(PyBytes::new_bound(py, &bytes).to_object(py))
 }
diff --git a/crates/polars-python/src/functions/lazy.rs b/crates/polars-python/src/functions/lazy.rs
index aa098aee2cb0..51800ed9d4e1 100644
--- a/crates/polars-python/src/functions/lazy.rs
+++ b/crates/polars-python/src/functions/lazy.rs
@@ -437,7 +437,7 @@ pub fn lit(value: &Bound<'_, PyAny>, allow_object: bool) -> PyResult<PyExpr> {
         Ok(dsl::lit(value.as_bytes()).into())
     } else if matches!(
         value.get_type().qualname().unwrap().as_str(),
-        "date" | "datetime" | "Decimal"
+        "date" | "datetime" | "time" | "timedelta" | "Decimal"
     ) {
         let av = py_object_to_any_value(value, true)?;
         Ok(Expr::Literal(LiteralValue::try_from(av).unwrap()).into())
diff --git a/crates/polars-python/src/lazyframe/general.rs b/crates/polars-python/src/lazyframe/general.rs
index 7ec2c392cb6e..cc79d1102abc 100644
--- a/crates/polars-python/src/lazyframe/general.rs
+++ b/crates/polars-python/src/lazyframe/general.rs
@@ -890,12 +890,12 @@ impl PyLazyFrame {
         strategy: Wrap<AsofStrategy>,
         tolerance: Option<Wrap<AnyValue<'_>>>,
         tolerance_str: Option<String>,
-        coalesce: Option<bool>,
+        coalesce: bool,
     ) -> PyResult<Self> {
-        let coalesce = match coalesce {
-            None => JoinCoalesce::JoinSpecific,
-            Some(true) => JoinCoalesce::CoalesceColumns,
-            Some(false) => JoinCoalesce::KeepColumns,
+        let coalesce = if coalesce {
+            JoinCoalesce::CoalesceColumns
+        } else {
+            JoinCoalesce::KeepColumns
         };
         let ldf = self.ldf.clone();
         let other = other.ldf;
@@ -1170,7 +1170,7 @@ impl PyLazyFrame {
 
     fn collect_schema(&mut self, py: Python) -> PyResult<PyObject> {
         let schema = py
-            .allow_threads(|| self.ldf.schema())
+            .allow_threads(|| self.ldf.collect_schema())
             .map_err(PyPolarsErr::from)?;
 
         let schema_dict = PyDict::new_bound(py);
diff --git a/crates/polars-python/src/lazyframe/visit.rs b/crates/polars-python/src/lazyframe/visit.rs
index 32585c4cc887..36d8e6e4b793 100644
--- a/crates/polars-python/src/lazyframe/visit.rs
+++ b/crates/polars-python/src/lazyframe/visit.rs
@@ -57,7 +57,7 @@ impl NodeTraverser {
     // Increment major on breaking changes to the IR (e.g. renaming
     // fields, reordering tuples), minor on backwards compatible
     // changes (e.g. exposing a new expression node).
-    const VERSION: Version = (1, 0);
+    const VERSION: Version = (1, 1);
 
     pub(crate) fn new(root: Node, lp_arena: Arena<IR>, expr_arena: Arena<AExpr>) -> Self {
         Self {
diff --git a/crates/polars-python/src/lazyframe/visitor/expr_nodes.rs b/crates/polars-python/src/lazyframe/visitor/expr_nodes.rs
index fe85d23b6fb7..d282e6d528e3 100644
--- a/crates/polars-python/src/lazyframe/visitor/expr_nodes.rs
+++ b/crates/polars-python/src/lazyframe/visitor/expr_nodes.rs
@@ -1,7 +1,11 @@
 use polars::datatypes::TimeUnit;
+use polars::series::ops::NullBehavior;
 use polars_core::prelude::{NonExistent, QuantileInterpolOptions};
 use polars_core::series::IsSorted;
 use polars_ops::prelude::ClosedInterval;
+use polars_ops::series::InterpolationMethod;
+#[cfg(feature = "search_sorted")]
+use polars_ops::series::SearchSortedSide;
 use polars_plan::dsl::function_expr::rolling::RollingFunction;
 use polars_plan::dsl::function_expr::rolling_by::RollingFunctionBy;
 use polars_plan::dsl::{BooleanFunction, StringFunction, TemporalFunction};
@@ -1054,21 +1058,31 @@ pub(crate) fn into_py(py: Python<'_>, expr: &AExpr) -> PyResult<PyObject> {
                 },
                 FunctionExpr::Abs => ("abs",).to_object(py),
                 #[cfg(feature = "hist")]
-                FunctionExpr::Hist { .. } => return Err(PyNotImplementedError::new_err("hist")),
+                FunctionExpr::Hist {
+                    bin_count,
+                    include_category,
+                    include_breakpoint,
+                } => ("hist", bin_count, include_category, include_breakpoint).to_object(py),
                 FunctionExpr::NullCount => ("null_count",).to_object(py),
                 FunctionExpr::Pow(f) => match f {
                     PowFunction::Generic => ("pow",).to_object(py),
                     PowFunction::Sqrt => ("sqrt",).to_object(py),
                     PowFunction::Cbrt => ("cbrt",).to_object(py),
                 },
-                FunctionExpr::Hash(_, _, _, _) => {
-                    return Err(PyNotImplementedError::new_err("hash"))
+                FunctionExpr::Hash(seed, seed_1, seed_2, seed_3) => {
+                    ("hash", seed, seed_1, seed_2, seed_3).to_object(py)
                 },
                 FunctionExpr::ArgWhere => ("argwhere",).to_object(py),
                 #[cfg(feature = "search_sorted")]
-                FunctionExpr::SearchSorted(_) => {
-                    return Err(PyNotImplementedError::new_err("search sorted"))
-                },
+                FunctionExpr::SearchSorted(side) => (
+                    "search_sorted",
+                    match side {
+                        SearchSortedSide::Any => "any",
+                        SearchSortedSide::Left => "left",
+                        SearchSortedSide::Right => "right",
+                    },
+                )
+                    .to_object(py),
                 FunctionExpr::Range(_) => return Err(PyNotImplementedError::new_err("range")),
                 #[cfg(feature = "trigonometry")]
                 FunctionExpr::Trigonometry(trigfun) => {
@@ -1147,17 +1161,13 @@ pub(crate) fn into_py(py: Python<'_>, expr: &AExpr) -> PyResult<PyObject> {
                         return Err(PyNotImplementedError::new_err("rolling std by"))
                     },
                 },
-                FunctionExpr::ShiftAndFill => {
-                    return Err(PyNotImplementedError::new_err("shift and fill"))
-                },
+                FunctionExpr::ShiftAndFill => ("shift_and_fill",).to_object(py),
                 FunctionExpr::Shift => ("shift",).to_object(py),
                 FunctionExpr::DropNans => ("drop_nans",).to_object(py),
                 FunctionExpr::DropNulls => ("drop_nulls",).to_object(py),
                 FunctionExpr::Mode => ("mode",).to_object(py),
-                FunctionExpr::Skew(_) => return Err(PyNotImplementedError::new_err("skew")),
-                FunctionExpr::Kurtosis(_, _) => {
-                    return Err(PyNotImplementedError::new_err("kurtosis"))
-                },
+                FunctionExpr::Skew(bias) => ("skew", bias).to_object(py),
+                FunctionExpr::Kurtosis(fisher, bias) => ("kurtosis", fisher, bias).to_object(py),
                 FunctionExpr::Reshape(_, _) => {
                     return Err(PyNotImplementedError::new_err("reshape"))
                 },
@@ -1168,11 +1178,8 @@ pub(crate) fn into_py(py: Python<'_>, expr: &AExpr) -> PyResult<PyObject> {
                     options: _,
                     seed: _,
                 } => return Err(PyNotImplementedError::new_err("rank")),
-                FunctionExpr::Clip {
-                    has_min: _,
-                    has_max: _,
-                } => return Err(PyNotImplementedError::new_err("clip")),
-                FunctionExpr::AsStruct => return Err(PyNotImplementedError::new_err("as struct")),
+                FunctionExpr::Clip { has_min, has_max } => ("clip", has_min, has_max).to_object(py),
+                FunctionExpr::AsStruct => ("as_struct",).to_object(py),
                 #[cfg(feature = "top_k")]
                 FunctionExpr::TopK { descending } => ("top_k", descending).to_object(py),
                 FunctionExpr::CumCount { reverse } => ("cum_count", reverse).to_object(py),
@@ -1182,37 +1189,41 @@ pub(crate) fn into_py(py: Python<'_>, expr: &AExpr) -> PyResult<PyObject> {
                 FunctionExpr::CumMax { reverse } => ("cum_max", reverse).to_object(py),
                 FunctionExpr::Reverse => ("reverse",).to_object(py),
                 FunctionExpr::ValueCounts {
-                    sort: _,
-                    parallel: _,
-                    name: _,
-                    normalize: _,
-                } => return Err(PyNotImplementedError::new_err("value counts")),
+                    sort,
+                    parallel,
+                    name,
+                    normalize,
+                } => ("value_counts", sort, parallel, name, normalize).to_object(py),
                 FunctionExpr::UniqueCounts => ("unique_counts",).to_object(py),
-                FunctionExpr::ApproxNUnique => {
-                    return Err(PyNotImplementedError::new_err("approx nunique"))
-                },
+                FunctionExpr::ApproxNUnique => ("approx_n_unique",).to_object(py),
                 FunctionExpr::Coalesce => ("coalesce",).to_object(py),
-                FunctionExpr::ShrinkType => {
-                    return Err(PyNotImplementedError::new_err("shrink type"))
-                },
-                FunctionExpr::Diff(_, _) => return Err(PyNotImplementedError::new_err("diff")),
+                FunctionExpr::ShrinkType => ("shrink_dtype",).to_object(py),
+                FunctionExpr::Diff(n, null_behaviour) => (
+                    "diff",
+                    n,
+                    match null_behaviour {
+                        NullBehavior::Drop => "drop",
+                        NullBehavior::Ignore => "ignore",
+                    },
+                )
+                    .to_object(py),
                 #[cfg(feature = "pct_change")]
-                FunctionExpr::PctChange => {
-                    return Err(PyNotImplementedError::new_err("pct change"))
-                },
-                FunctionExpr::Interpolate(_) => {
-                    return Err(PyNotImplementedError::new_err("interpolate"))
-                },
-                FunctionExpr::InterpolateBy => {
-                    return Err(PyNotImplementedError::new_err("interpolate_by"))
+                FunctionExpr::PctChange => ("pct_change",).to_object(py),
+                FunctionExpr::Interpolate(method) => (
+                    "interpolate",
+                    match method {
+                        InterpolationMethod::Linear => "linear",
+                        InterpolationMethod::Nearest => "nearest",
+                    },
+                )
+                    .to_object(py),
+                FunctionExpr::InterpolateBy => ("interpolate_by",).to_object(py),
+                FunctionExpr::Entropy { base, normalize } => {
+                    ("entropy", base, normalize).to_object(py)
                 },
-                FunctionExpr::Entropy {
-                    base: _,
-                    normalize: _,
-                } => return Err(PyNotImplementedError::new_err("entropy")),
-                FunctionExpr::Log { base: _ } => return Err(PyNotImplementedError::new_err("log")),
-                FunctionExpr::Log1p => return Err(PyNotImplementedError::new_err("log1p")),
-                FunctionExpr::Exp => return Err(PyNotImplementedError::new_err("exp")),
+                FunctionExpr::Log { base } => ("log", base).to_object(py),
+                FunctionExpr::Log1p => ("log1p",).to_object(py),
+                FunctionExpr::Exp => ("exp",).to_object(py),
                 FunctionExpr::Unique(maintain_order) => ("unique", maintain_order).to_object(py),
                 FunctionExpr::Round { decimals } => ("round", decimals).to_object(py),
                 FunctionExpr::RoundSF { digits } => ("round_sig_figs", digits).to_object(py),
@@ -1228,20 +1239,18 @@ pub(crate) fn into_py(py: Python<'_>, expr: &AExpr) -> PyResult<PyObject> {
                     return Err(PyNotImplementedError::new_err("corr"))
                 },
                 #[cfg(feature = "peaks")]
-                FunctionExpr::PeakMin => return Err(PyNotImplementedError::new_err("peak min")),
+                FunctionExpr::PeakMin => ("peak_max",).to_object(py),
                 #[cfg(feature = "peaks")]
-                FunctionExpr::PeakMax => return Err(PyNotImplementedError::new_err("peak max")),
+                FunctionExpr::PeakMax => ("peak_min",).to_object(py),
                 #[cfg(feature = "cutqcut")]
                 FunctionExpr::Cut { .. } => return Err(PyNotImplementedError::new_err("cut")),
                 #[cfg(feature = "cutqcut")]
                 FunctionExpr::QCut { .. } => return Err(PyNotImplementedError::new_err("qcut")),
                 #[cfg(feature = "rle")]
-                FunctionExpr::RLE => return Err(PyNotImplementedError::new_err("rle")),
+                FunctionExpr::RLE => ("rle",).to_object(py),
                 #[cfg(feature = "rle")]
-                FunctionExpr::RLEID => return Err(PyNotImplementedError::new_err("rleid")),
-                FunctionExpr::ToPhysical => {
-                    return Err(PyNotImplementedError::new_err("to physical"))
-                },
+                FunctionExpr::RLEID => ("rle_id",).to_object(py),
+                FunctionExpr::ToPhysical => ("to_physical",).to_object(py),
                 FunctionExpr::Random { .. } => {
                     return Err(PyNotImplementedError::new_err("random"))
                 },
@@ -1258,24 +1267,12 @@ pub(crate) fn into_py(py: Python<'_>, expr: &AExpr) -> PyResult<PyObject> {
                 FunctionExpr::FfiPlugin { .. } => {
                     return Err(PyNotImplementedError::new_err("ffi plugin"))
                 },
-                FunctionExpr::BackwardFill { limit: _ } => {
-                    return Err(PyNotImplementedError::new_err("backward fill"))
-                },
-                FunctionExpr::ForwardFill { limit: _ } => {
-                    return Err(PyNotImplementedError::new_err("forward fill"))
-                },
-                FunctionExpr::SumHorizontal => {
-                    return Err(PyNotImplementedError::new_err("sum horizontal"))
-                },
-                FunctionExpr::MaxHorizontal => {
-                    return Err(PyNotImplementedError::new_err("max horizontal"))
-                },
-                FunctionExpr::MeanHorizontal => {
-                    return Err(PyNotImplementedError::new_err("mean horizontal"))
-                },
-                FunctionExpr::MinHorizontal => {
-                    return Err(PyNotImplementedError::new_err("min horizontal"))
-                },
+                FunctionExpr::BackwardFill { limit } => ("backward_fill", limit).to_object(py),
+                FunctionExpr::ForwardFill { limit } => ("forward_fill", limit).to_object(py),
+                FunctionExpr::SumHorizontal => ("sum_horizontal",).to_object(py),
+                FunctionExpr::MaxHorizontal => ("max_horizontal",).to_object(py),
+                FunctionExpr::MeanHorizontal => ("mean_horizontal",).to_object(py),
+                FunctionExpr::MinHorizontal => ("min_horizontal",).to_object(py),
                 FunctionExpr::EwmMean { options: _ } => {
                     return Err(PyNotImplementedError::new_err("ewm mean"))
                 },
@@ -1285,23 +1282,20 @@ pub(crate) fn into_py(py: Python<'_>, expr: &AExpr) -> PyResult<PyObject> {
                 FunctionExpr::EwmVar { options: _ } => {
                     return Err(PyNotImplementedError::new_err("ewm var"))
                 },
-                FunctionExpr::Replace => return Err(PyNotImplementedError::new_err("replace")),
+                FunctionExpr::Replace => ("replace",).to_object(py),
                 FunctionExpr::ReplaceStrict { return_dtype: _ } => {
-                    return Err(PyNotImplementedError::new_err("replace_strict"))
+                    // Can ignore the return dtype because it is encoded in the schema.
+                    ("replace_strict",).to_object(py)
                 },
-                FunctionExpr::Negate => return Err(PyNotImplementedError::new_err("negate")),
+                FunctionExpr::Negate => ("negate",).to_object(py),
                 FunctionExpr::FillNullWithStrategy(_) => {
                     return Err(PyNotImplementedError::new_err("fill null with strategy"))
                 },
                 FunctionExpr::GatherEvery { n, offset } => {
                     ("gather_every", offset, n).to_object(py)
                 },
-                FunctionExpr::Reinterpret(_) => {
-                    return Err(PyNotImplementedError::new_err("reinterpret"))
-                },
-                FunctionExpr::ExtendConstant => {
-                    return Err(PyNotImplementedError::new_err("extend constant"))
-                },
+                FunctionExpr::Reinterpret(signed) => ("reinterpret", signed).to_object(py),
+                FunctionExpr::ExtendConstant => ("extend_constant",).to_object(py),
                 FunctionExpr::Business(_) => {
                     return Err(PyNotImplementedError::new_err("business"))
                 },
diff --git a/crates/polars-python/src/lazygroupby.rs b/crates/polars-python/src/lazygroupby.rs
index 255bb34917f9..52df635efb53 100644
--- a/crates/polars-python/src/lazygroupby.rs
+++ b/crates/polars-python/src/lazygroupby.rs
@@ -43,7 +43,7 @@ impl PyLazyGroupBy {
         let schema = match schema {
             Some(schema) => Arc::new(schema.0),
             None => LazyFrame::from(lgb.logical_plan.clone())
-                .schema()
+                .collect_schema()
                 .map_err(PyPolarsErr::from)?,
         };
 
diff --git a/crates/polars-python/src/series/general.rs b/crates/polars-python/src/series/general.rs
index a3ea8a6fc676..63c1caeb71ee 100644
--- a/crates/polars-python/src/series/general.rs
+++ b/crates/polars-python/src/series/general.rs
@@ -335,7 +335,7 @@ impl PySeries {
             if let Some(output_type) = output_type {
                 return Ok(Series::full_null(series.name(), series.len(), &output_type.0).into());
             }
-            let msg = "The output type of the 'apply' function cannot be determined.\n\
+            let msg = "The output type of the 'map_elements' function cannot be determined.\n\
             The function was never called because 'skip_nulls=True' and all values are null.\n\
             Consider setting 'skip_nulls=False' or setting the 'return_dtype'.";
             raise_err!(msg, ComputeError)
diff --git a/crates/polars-sql/Cargo.toml b/crates/polars-sql/Cargo.toml
index 0c8f883daf50..29febbfc149c 100644
--- a/crates/polars-sql/Cargo.toml
+++ b/crates/polars-sql/Cargo.toml
@@ -37,7 +37,7 @@ csv = ["polars-lazy/csv"]
 diagonal_concat = ["polars-lazy/diagonal_concat"]
 dtype-decimal = ["polars-lazy/dtype-decimal"]
 ipc = ["polars-lazy/ipc"]
-json = ["polars-lazy/json", "polars-plan/extract_jsonpath"]
+json = ["polars-lazy/json", "polars-plan/json", "polars-plan/extract_jsonpath"]
 list_eval = ["polars-lazy/list_eval"]
 parquet = ["polars-lazy/parquet"]
 semi_anti_join = ["polars-lazy/semi_anti_join"]
diff --git a/crates/polars-sql/src/context.rs b/crates/polars-sql/src/context.rs
index ab1b9a53997c..b131ae805339 100644
--- a/crates/polars-sql/src/context.rs
+++ b/crates/polars-sql/src/context.rs
@@ -382,7 +382,7 @@ impl SQLContext {
         let lf_schema = self.get_frame_schema(&mut lf)?;
         let lf_cols: Vec<_> = lf_schema.iter_names().map(|nm| col(nm)).collect();
         let joined_tbl = match quantifier {
-            SetQuantifier::ByName | SetQuantifier::AllByName => join.on(lf_cols).finish(),
+            SetQuantifier::ByName => join.on(lf_cols).finish(),
             SetQuantifier::Distinct | SetQuantifier::None => {
                 let rf_schema = self.get_frame_schema(&mut rf)?;
                 let rf_cols: Vec<_> = rf_schema.iter_names().map(|nm| col(nm)).collect();
diff --git a/crates/polars-stream/Cargo.toml b/crates/polars-stream/Cargo.toml
index a8741189f7dd..e2a7d0c45649 100644
--- a/crates/polars-stream/Cargo.toml
+++ b/crates/polars-stream/Cargo.toml
@@ -12,9 +12,11 @@ description = "Private crate for the streaming execution engine for the Polars D
 atomic-waker = { workspace = true }
 crossbeam-deque = { workspace = true }
 crossbeam-utils = { workspace = true }
+futures = { workspace = true }
+memmap = { workspace = true }
 parking_lot = { workspace = true }
 pin-project-lite = { workspace = true }
-polars-io = { workspace = true, features = ["async"] }
+polars-io = { workspace = true, features = ["async", "cloud", "aws"] }
 polars-utils = { workspace = true }
 rand = { workspace = true }
 rayon = { workspace = true }
@@ -25,8 +27,9 @@ tokio = { workspace = true }
 polars-core = { workspace = true }
 polars-error = { workspace = true }
 polars-expr = { workspace = true }
-polars-mem-engine = { workspace = true }
-polars-plan = { workspace = true }
+polars-mem-engine = { workspace = true, features = ["parquet"] }
+polars-parquet = { workspace = true }
+polars-plan = { workspace = true, features = ["parquet"] }
 
 [build-dependencies]
 version_check = { workspace = true }
diff --git a/crates/polars-stream/src/async_executor/mod.rs b/crates/polars-stream/src/async_executor/mod.rs
index ea239628990f..dec560845b09 100644
--- a/crates/polars-stream/src/async_executor/mod.rs
+++ b/crates/polars-stream/src/async_executor/mod.rs
@@ -15,7 +15,7 @@ use parking_lot::Mutex;
 use rand::rngs::SmallRng;
 use rand::{Rng, SeedableRng};
 use slotmap::SlotMap;
-pub use task::JoinHandle;
+pub use task::{AbortOnDropHandle, JoinHandle};
 use task::{CancelHandle, Runnable};
 
 static NUM_EXECUTOR_THREADS: AtomicUsize = AtomicUsize::new(0);
@@ -345,7 +345,6 @@ where
     }
 }
 
-#[allow(unused)]
 pub fn spawn<F: Future + Send + 'static>(priority: TaskPriority, fut: F) -> JoinHandle<F::Output>
 where
     <F as Future>::Output: Send + 'static,
diff --git a/crates/polars-stream/src/async_executor/task.rs b/crates/polars-stream/src/async_executor/task.rs
index b1f0dfcfbe69..9991377eb718 100644
--- a/crates/polars-stream/src/async_executor/task.rs
+++ b/crates/polars-stream/src/async_executor/task.rs
@@ -278,6 +278,10 @@ impl<M> Runnable<M> {
 
 pub struct JoinHandle<T>(Option<Arc<dyn Joinable<T>>>);
 pub struct CancelHandle(Weak<dyn Cancellable>);
+pub struct AbortOnDropHandle<T> {
+    join_handle: JoinHandle<T>,
+    cancel_handle: CancelHandle,
+}
 
 impl<T> JoinHandle<T> {
     pub fn cancel_handle(&self) -> CancelHandle {
@@ -305,13 +309,37 @@ impl<T> Future for JoinHandle<T> {
 }
 
 impl CancelHandle {
-    pub fn cancel(self) {
+    pub fn cancel(&self) {
         if let Some(t) = self.0.upgrade() {
             t.cancel();
         }
     }
 }
 
+impl<T> AbortOnDropHandle<T> {
+    pub fn new(join_handle: JoinHandle<T>) -> Self {
+        let cancel_handle = join_handle.cancel_handle();
+        Self {
+            join_handle,
+            cancel_handle,
+        }
+    }
+}
+
+impl<T> Future for AbortOnDropHandle<T> {
+    type Output = T;
+
+    fn poll(mut self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll<Self::Output> {
+        Pin::new(&mut self.join_handle).poll(cx)
+    }
+}
+
+impl<T> Drop for AbortOnDropHandle<T> {
+    fn drop(&mut self) {
+        self.cancel_handle.cancel();
+    }
+}
+
 pub fn spawn<F, S, M>(future: F, schedule: S, metadata: M) -> (Runnable<M>, JoinHandle<F::Output>)
 where
     F: Future + Send + 'static,
diff --git a/crates/polars-stream/src/async_primitives/distributor_channel.rs b/crates/polars-stream/src/async_primitives/distributor_channel.rs
index 5bdeb7e56866..21af7b53d7d1 100644
--- a/crates/polars-stream/src/async_primitives/distributor_channel.rs
+++ b/crates/polars-stream/src/async_primitives/distributor_channel.rs
@@ -198,6 +198,8 @@ impl<T: Send> Sender<T> {
 }
 
 impl<T: Send> Receiver<T> {
+    /// Note: This intentionally takes `&mut` to ensure it is only accessed in a single-threaded
+    /// manner.
     pub async fn recv(&mut self) -> Result<T, ()> {
         loop {
             // Fast-path.
diff --git a/crates/polars-stream/src/execute.rs b/crates/polars-stream/src/execute.rs
index 5f3bdebf7d36..d17bc89bd6ad 100644
--- a/crates/polars-stream/src/execute.rs
+++ b/crates/polars-stream/src/execute.rs
@@ -205,10 +205,11 @@ fn run_subgraph(
             for input in &node.inputs {
                 let sender = graph.pipes[*input].sender;
                 if let Some(count) = num_send_ports_not_yet_ready.get_mut(sender) {
-                    assert!(*count > 0);
-                    *count -= 1;
-                    if *count == 0 {
-                        ready.push(sender);
+                    if *count > 0 {
+                        *count -= 1;
+                        if *count == 0 {
+                            ready.push(sender);
+                        }
                     }
                 }
             }
@@ -247,7 +248,7 @@ pub fn execute_graph(
         if polars_core::config::verbose() {
             eprintln!("polars-stream: updating graph state");
         }
-        graph.update_all_states();
+        graph.update_all_states()?;
         let (nodes, pipes) = find_runnable_subgraph(graph);
         if polars_core::config::verbose() {
             for node in &nodes {
diff --git a/crates/polars-stream/src/expression.rs b/crates/polars-stream/src/expression.rs
index a6e41728d111..3c1b9445997c 100644
--- a/crates/polars-stream/src/expression.rs
+++ b/crates/polars-stream/src/expression.rs
@@ -6,7 +6,7 @@ use polars_error::PolarsResult;
 use polars_expr::prelude::{ExecutionState, PhysicalExpr};
 
 #[derive(Clone)]
-pub(crate) struct StreamExpr {
+pub struct StreamExpr {
     inner: Arc<dyn PhysicalExpr>,
     // Whether the expression can be re-entering the engine (e.g. a function use the lazy api
     // within that function)
@@ -14,18 +14,14 @@ pub(crate) struct StreamExpr {
 }
 
 impl StreamExpr {
-    pub(crate) fn new(phys_expr: Arc<dyn PhysicalExpr>, reentrant: bool) -> Self {
+    pub fn new(phys_expr: Arc<dyn PhysicalExpr>, reentrant: bool) -> Self {
         Self {
             inner: phys_expr,
             reentrant,
         }
     }
 
-    pub(crate) async fn evaluate(
-        &self,
-        df: &DataFrame,
-        state: &ExecutionState,
-    ) -> PolarsResult<Series> {
+    pub async fn evaluate(&self, df: &DataFrame, state: &ExecutionState) -> PolarsResult<Series> {
         if self.reentrant {
             let state = state.clone();
             let phys_expr = self.inner.clone();
diff --git a/crates/polars-stream/src/graph.rs b/crates/polars-stream/src/graph.rs
index 055d8df4a5ae..572c1f1c306d 100644
--- a/crates/polars-stream/src/graph.rs
+++ b/crates/polars-stream/src/graph.rs
@@ -1,3 +1,4 @@
+use polars_error::PolarsResult;
 use slotmap::{SecondaryMap, SlotMap};
 
 use crate::nodes::ComputeNode;
@@ -64,11 +65,13 @@ impl Graph {
     }
 
     /// Updates all the nodes' states until a fixed point is reached.
-    pub fn update_all_states(&mut self) {
+    pub fn update_all_states(&mut self) -> PolarsResult<()> {
         let mut to_update: Vec<_> = self.nodes.keys().collect();
         let mut scheduled_for_update: SecondaryMap<GraphNodeKey, ()> =
             self.nodes.keys().map(|k| (k, ())).collect();
 
+        let verbose = std::env::var("POLARS_VERBOSE_STATE_UPDATE").as_deref() == Ok("1");
+
         let mut recv_state = Vec::new();
         let mut send_state = Vec::new();
         while let Some(node_key) = to_update.pop() {
@@ -82,15 +85,25 @@ impl Graph {
             send_state.extend(node.outputs.iter().map(|o| self.pipes[*o].recv_state));
 
             // Compute the new state of this node given its environment.
-            // eprintln!("updating {}, before: {recv_state:?} {send_state:?}", node.compute.name());
-            node.compute.update_state(&mut recv_state, &mut send_state);
-            // eprintln!("updating {}, after: {recv_state:?} {send_state:?}", node.compute.name());
+            if verbose {
+                eprintln!(
+                    "updating {}, before: {recv_state:?} {send_state:?}",
+                    node.compute.name()
+                );
+            }
+            node.compute
+                .update_state(&mut recv_state, &mut send_state)?;
+            if verbose {
+                eprintln!(
+                    "updating {}, after: {recv_state:?} {send_state:?}",
+                    node.compute.name()
+                );
+            }
 
             // Propagate information.
             for (input, state) in node.inputs.iter().zip(recv_state.iter()) {
                 let pipe = &mut self.pipes[*input];
                 if pipe.recv_state != *state {
-                    // eprintln!("transitioning input pipe from {:?} to {state:?}", pipe.recv_state);
                     assert!(pipe.recv_state != PortState::Done, "implementation error: state transition from Done to Blocked/Ready attempted");
                     pipe.recv_state = *state;
                     if scheduled_for_update.insert(pipe.sender, ()).is_none() {
@@ -102,7 +115,6 @@ impl Graph {
             for (output, state) in node.outputs.iter().zip(send_state.iter()) {
                 let pipe = &mut self.pipes[*output];
                 if pipe.send_state != *state {
-                    // eprintln!("transitioning output pipe from {:?} to {state:?}", pipe.send_state);
                     assert!(pipe.send_state != PortState::Done, "implementation error: state transition from Done to Blocked/Ready attempted");
                     pipe.send_state = *state;
                     if scheduled_for_update.insert(pipe.receiver, ()).is_none() {
@@ -111,6 +123,7 @@ impl Graph {
                 }
             }
         }
+        Ok(())
     }
 }
 
diff --git a/crates/polars-stream/src/nodes/filter.rs b/crates/polars-stream/src/nodes/filter.rs
index 8a19b1a27986..9f0b0301ef91 100644
--- a/crates/polars-stream/src/nodes/filter.rs
+++ b/crates/polars-stream/src/nodes/filter.rs
@@ -18,9 +18,10 @@ impl ComputeNode for FilterNode {
         "filter"
     }
 
-    fn update_state(&mut self, recv: &mut [PortState], send: &mut [PortState]) {
+    fn update_state(&mut self, recv: &mut [PortState], send: &mut [PortState]) -> PolarsResult<()> {
         assert!(recv.len() == 1 && send.len() == 1);
         recv.swap_with_slice(send);
+        Ok(())
     }
 
     fn spawn<'env, 's>(
diff --git a/crates/polars-stream/src/nodes/in_memory_map.rs b/crates/polars-stream/src/nodes/in_memory_map.rs
index 09769172c430..3a8bff496a18 100644
--- a/crates/polars-stream/src/nodes/in_memory_map.rs
+++ b/crates/polars-stream/src/nodes/in_memory_map.rs
@@ -39,7 +39,7 @@ impl ComputeNode for InMemoryMapNode {
         }
     }
 
-    fn update_state(&mut self, recv: &mut [PortState], send: &mut [PortState]) {
+    fn update_state(&mut self, recv: &mut [PortState], send: &mut [PortState]) -> PolarsResult<()> {
         assert!(recv.len() == 1 && send.len() == 1);
 
         // If the output doesn't want any more data, transition to being done.
@@ -55,9 +55,8 @@ impl ComputeNode for InMemoryMapNode {
         } = self
         {
             if recv[0] == PortState::Done {
-                let df = sink_node.get_output().unwrap();
-                let mut source_node =
-                    InMemorySourceNode::new(Arc::new(map.call_udf(df.unwrap()).unwrap()));
+                let df = sink_node.get_output()?;
+                let mut source_node = InMemorySourceNode::new(Arc::new(map.call_udf(df.unwrap())?));
                 source_node.initialize(*num_pipelines);
                 *self = Self::Source(source_node);
             }
@@ -65,18 +64,19 @@ impl ComputeNode for InMemoryMapNode {
 
         match self {
             Self::Sink { sink_node, .. } => {
-                sink_node.update_state(recv, &mut []);
+                sink_node.update_state(recv, &mut [])?;
                 send[0] = PortState::Blocked;
             },
             Self::Source(source_node) => {
                 recv[0] = PortState::Done;
-                source_node.update_state(&mut [], send);
+                source_node.update_state(&mut [], send)?;
             },
             Self::Done => {
                 recv[0] = PortState::Done;
                 send[0] = PortState::Done;
             },
         }
+        Ok(())
     }
 
     fn is_memory_intensive_pipeline_blocker(&self) -> bool {
diff --git a/crates/polars-stream/src/nodes/in_memory_sink.rs b/crates/polars-stream/src/nodes/in_memory_sink.rs
index 0a4750d7b8b9..afd6ccfd95cc 100644
--- a/crates/polars-stream/src/nodes/in_memory_sink.rs
+++ b/crates/polars-stream/src/nodes/in_memory_sink.rs
@@ -26,7 +26,7 @@ impl ComputeNode for InMemorySinkNode {
         "in_memory_sink"
     }
 
-    fn update_state(&mut self, recv: &mut [PortState], send: &mut [PortState]) {
+    fn update_state(&mut self, recv: &mut [PortState], send: &mut [PortState]) -> PolarsResult<()> {
         assert!(send.is_empty());
         assert!(recv.len() == 1);
 
@@ -35,6 +35,7 @@ impl ComputeNode for InMemorySinkNode {
         if recv[0] != PortState::Done {
             recv[0] = PortState::Ready;
         }
+        Ok(())
     }
 
     fn is_memory_intensive_pipeline_blocker(&self) -> bool {
diff --git a/crates/polars-stream/src/nodes/in_memory_source.rs b/crates/polars-stream/src/nodes/in_memory_source.rs
index 826f9e5e5c83..45630eb7aab0 100644
--- a/crates/polars-stream/src/nodes/in_memory_source.rs
+++ b/crates/polars-stream/src/nodes/in_memory_source.rs
@@ -34,7 +34,7 @@ impl ComputeNode for InMemorySourceNode {
         self.seq = AtomicU64::new(0);
     }
 
-    fn update_state(&mut self, recv: &mut [PortState], send: &mut [PortState]) {
+    fn update_state(&mut self, recv: &mut [PortState], send: &mut [PortState]) -> PolarsResult<()> {
         assert!(recv.is_empty());
         assert!(send.len() == 1);
 
@@ -52,6 +52,7 @@ impl ComputeNode for InMemorySourceNode {
         } else {
             send[0] = PortState::Ready;
         }
+        Ok(())
     }
 
     fn spawn<'env, 's>(
diff --git a/crates/polars-stream/src/nodes/map.rs b/crates/polars-stream/src/nodes/map.rs
index 44587193f23d..007dfa921672 100644
--- a/crates/polars-stream/src/nodes/map.rs
+++ b/crates/polars-stream/src/nodes/map.rs
@@ -20,9 +20,10 @@ impl ComputeNode for MapNode {
         "map"
     }
 
-    fn update_state(&mut self, recv: &mut [PortState], send: &mut [PortState]) {
+    fn update_state(&mut self, recv: &mut [PortState], send: &mut [PortState]) -> PolarsResult<()> {
         assert!(recv.len() == 1 && send.len() == 1);
         recv.swap_with_slice(send);
+        Ok(())
     }
 
     fn spawn<'env, 's>(
diff --git a/crates/polars-stream/src/nodes/mod.rs b/crates/polars-stream/src/nodes/mod.rs
index 839646a488b0..4c71380e0ad4 100644
--- a/crates/polars-stream/src/nodes/mod.rs
+++ b/crates/polars-stream/src/nodes/mod.rs
@@ -5,6 +5,7 @@ pub mod in_memory_source;
 pub mod map;
 pub mod multiplexer;
 pub mod ordered_union;
+pub mod parquet_source;
 pub mod reduce;
 pub mod select;
 pub mod simple_projection;
@@ -45,7 +46,7 @@ pub trait ComputeNode: Send {
     /// Similarly, for each output pipe `send` will contain the respective
     /// state of the input port that pipe is connected to when called, and you
     /// must update it to contain the desired state of your output port.
-    fn update_state(&mut self, recv: &mut [PortState], send: &mut [PortState]);
+    fn update_state(&mut self, recv: &mut [PortState], send: &mut [PortState]) -> PolarsResult<()>;
 
     /// If this node (in its current state) is a pipeline blocker, and whether
     /// this is memory intensive or not.
diff --git a/crates/polars-stream/src/nodes/multiplexer.rs b/crates/polars-stream/src/nodes/multiplexer.rs
index a0238b94da2e..65f2e752d28d 100644
--- a/crates/polars-stream/src/nodes/multiplexer.rs
+++ b/crates/polars-stream/src/nodes/multiplexer.rs
@@ -34,7 +34,7 @@ impl ComputeNode for MultiplexerNode {
         "multiplexer"
     }
 
-    fn update_state(&mut self, recv: &mut [PortState], send: &mut [PortState]) {
+    fn update_state(&mut self, recv: &mut [PortState], send: &mut [PortState]) -> PolarsResult<()> {
         assert!(recv.len() == 1 && !send.is_empty());
 
         // Initialize buffered streams, and mark those for which the receiver
@@ -60,14 +60,24 @@ impl ComputeNode for MultiplexerNode {
             for s in send {
                 *s = PortState::Done;
             }
-            return;
+            return Ok(());
         }
 
         let all_blocked = send.iter().all(|p| *p == PortState::Blocked);
 
         // Pass along the input state to the output.
-        for s in send {
-            *s = recv[0];
+        for (i, s) in send.iter_mut().enumerate() {
+            let buffer_empty = match &self.buffers[i] {
+                BufferedStream::Open(v) => v.is_empty(),
+                BufferedStream::Closed => true,
+            };
+            *s = if buffer_empty && recv[0] == PortState::Done {
+                PortState::Done
+            } else if !buffer_empty || recv[0] == PortState::Ready {
+                PortState::Ready
+            } else {
+                PortState::Blocked
+            };
         }
 
         // We say we are ready to receive unless all outputs are blocked.
@@ -76,6 +86,7 @@ impl ComputeNode for MultiplexerNode {
         } else {
             PortState::Ready
         };
+        Ok(())
     }
 
     fn spawn<'env, 's>(
diff --git a/crates/polars-stream/src/nodes/ordered_union.rs b/crates/polars-stream/src/nodes/ordered_union.rs
index f38c306505b4..3c72d9cc6e15 100644
--- a/crates/polars-stream/src/nodes/ordered_union.rs
+++ b/crates/polars-stream/src/nodes/ordered_union.rs
@@ -23,7 +23,7 @@ impl ComputeNode for OrderedUnionNode {
         "ordered_union"
     }
 
-    fn update_state(&mut self, recv: &mut [PortState], send: &mut [PortState]) {
+    fn update_state(&mut self, recv: &mut [PortState], send: &mut [PortState]) -> PolarsResult<()> {
         assert!(self.cur_input_idx <= recv.len() && send.len() == 1);
 
         // Skip inputs that are done.
@@ -46,6 +46,7 @@ impl ComputeNode for OrderedUnionNode {
 
         // Set the morsel offset one higher than any sent so far.
         self.morsel_offset = self.max_morsel_seq_sent.successor();
+        Ok(())
     }
 
     fn spawn<'env, 's>(
diff --git a/crates/polars-stream/src/nodes/parquet_source.rs b/crates/polars-stream/src/nodes/parquet_source.rs
new file mode 100644
index 000000000000..16184645da74
--- /dev/null
+++ b/crates/polars-stream/src/nodes/parquet_source.rs
@@ -0,0 +1,1920 @@
+use std::future::Future;
+use std::path::PathBuf;
+use std::sync::atomic::AtomicBool;
+use std::sync::Arc;
+
+use futures::stream::FuturesUnordered;
+use futures::StreamExt;
+use polars_core::config;
+use polars_core::frame::DataFrame;
+use polars_core::prelude::{
+    ArrowSchema, ChunkFull, DataType, IdxCa, InitHashMaps, PlHashMap, StringChunked,
+};
+use polars_core::schema::IndexOfSchema;
+use polars_core::series::{IntoSeries, IsSorted, Series};
+use polars_core::utils::operation_exceeded_idxsize_msg;
+use polars_error::{polars_bail, polars_err, PolarsResult};
+use polars_expr::prelude::PhysicalExpr;
+use polars_io::cloud::CloudOptions;
+use polars_io::predicates::PhysicalIoExpr;
+use polars_io::prelude::{FileMetaData, ParquetOptions};
+use polars_io::utils::byte_source::{
+    ByteSource, DynByteSource, DynByteSourceBuilder, MemSliceByteSource,
+};
+use polars_io::utils::slice::SplitSlicePosition;
+use polars_io::{is_cloud_url, RowIndex};
+use polars_parquet::read::RowGroupMetaData;
+use polars_plan::plans::hive::HivePartitions;
+use polars_plan::plans::FileInfo;
+use polars_plan::prelude::FileScanOptions;
+use polars_utils::aliases::PlHashSet;
+use polars_utils::mmap::MemSlice;
+use polars_utils::slice::GetSaferUnchecked;
+use polars_utils::IdxSize;
+
+use super::{MorselSeq, TaskPriority};
+use crate::async_executor::{self};
+use crate::async_primitives::connector::connector;
+use crate::async_primitives::wait_group::{WaitGroup, WaitToken};
+use crate::morsel::get_ideal_morsel_size;
+use crate::utils::notify_channel::{notify_channel, NotifyReceiver};
+use crate::utils::task_handles_ext;
+
+type AsyncTaskData = Option<(
+    Vec<crate::async_primitives::connector::Receiver<(DataFrame, MorselSeq, WaitToken)>>,
+    async_executor::AbortOnDropHandle<PolarsResult<()>>,
+)>;
+
+#[allow(clippy::type_complexity)]
+pub struct ParquetSourceNode {
+    paths: Arc<Vec<PathBuf>>,
+    file_info: FileInfo,
+    hive_parts: Option<Arc<Vec<HivePartitions>>>,
+    predicate: Option<Arc<dyn PhysicalExpr>>,
+    options: ParquetOptions,
+    cloud_options: Option<CloudOptions>,
+    file_options: FileScanOptions,
+    // Run-time vars
+    config: Config,
+    verbose: bool,
+    physical_predicate: Option<Arc<dyn PhysicalIoExpr>>,
+    projected_arrow_fields: Arc<[polars_core::prelude::ArrowField]>,
+    byte_source_builder: DynByteSourceBuilder,
+    memory_prefetch_func: fn(&[u8]) -> (),
+    // This permit blocks execution until the first morsel is requested.
+    morsel_stream_starter: Option<tokio::sync::oneshot::Sender<()>>,
+    // This is behind a Mutex so that we can call `shutdown()` asynchronously.
+    async_task_data: Arc<tokio::sync::Mutex<AsyncTaskData>>,
+    row_group_decoder: Option<Arc<RowGroupDecoder>>,
+    is_finished: Arc<AtomicBool>,
+}
+
+#[allow(clippy::too_many_arguments)]
+impl ParquetSourceNode {
+    pub fn new(
+        paths: Arc<Vec<PathBuf>>,
+        file_info: FileInfo,
+        hive_parts: Option<Arc<Vec<HivePartitions>>>,
+        predicate: Option<Arc<dyn PhysicalExpr>>,
+        options: ParquetOptions,
+        cloud_options: Option<CloudOptions>,
+        file_options: FileScanOptions,
+    ) -> Self {
+        let verbose = config::verbose();
+
+        let byte_source_builder =
+            if is_cloud_url(paths[0].to_str().unwrap()) || config::force_async() {
+                DynByteSourceBuilder::ObjectStore
+            } else {
+                DynByteSourceBuilder::Mmap
+            };
+        let memory_prefetch_func = get_memory_prefetch_func(verbose);
+
+        Self {
+            paths,
+            file_info,
+            hive_parts,
+            predicate,
+            options,
+            cloud_options,
+            file_options,
+
+            config: Config {
+                // Initialized later
+                num_pipelines: 0,
+                metadata_prefetch_size: 0,
+                metadata_decode_ahead_size: 0,
+                row_group_prefetch_size: 0,
+            },
+            verbose,
+            physical_predicate: None,
+            projected_arrow_fields: Arc::new([]),
+            byte_source_builder,
+            memory_prefetch_func,
+
+            morsel_stream_starter: None,
+            async_task_data: Arc::new(tokio::sync::Mutex::new(None)),
+            row_group_decoder: None,
+            is_finished: Arc::new(AtomicBool::new(false)),
+        }
+    }
+}
+
+mod compute_node_impl {
+
+    use std::sync::Arc;
+
+    use polars_expr::prelude::phys_expr_to_io_expr;
+
+    use super::super::compute_node_prelude::*;
+    use super::{Config, ParquetSourceNode};
+    use crate::morsel::SourceToken;
+
+    impl ComputeNode for ParquetSourceNode {
+        fn name(&self) -> &str {
+            "parquet_source"
+        }
+
+        fn initialize(&mut self, num_pipelines: usize) {
+            self.config = {
+                let metadata_prefetch_size = polars_core::config::get_file_prefetch_size();
+                // Limit metadata decode to the number of threads.
+                let metadata_decode_ahead_size =
+                    (metadata_prefetch_size / 2).min(1 + num_pipelines).max(1);
+                let row_group_prefetch_size = polars_core::config::get_rg_prefetch_size();
+
+                Config {
+                    num_pipelines,
+                    metadata_prefetch_size,
+                    metadata_decode_ahead_size,
+                    row_group_prefetch_size,
+                }
+            };
+
+            if self.verbose {
+                eprintln!("[ParquetSource]: {:?}", &self.config);
+            }
+
+            self.init_projected_arrow_fields();
+            self.physical_predicate = self.predicate.clone().map(phys_expr_to_io_expr);
+
+            let (raw_morsel_receivers, morsel_stream_task_handle) = self.init_raw_morsel_stream();
+
+            self.async_task_data
+                .try_lock()
+                .unwrap()
+                .replace((raw_morsel_receivers, morsel_stream_task_handle));
+
+            let row_group_decoder = self.init_row_group_decoder();
+            self.row_group_decoder = Some(Arc::new(row_group_decoder));
+        }
+
+        fn update_state(
+            &mut self,
+            recv: &mut [PortState],
+            send: &mut [PortState],
+        ) -> PolarsResult<()> {
+            use std::sync::atomic::Ordering;
+
+            assert!(recv.is_empty());
+            assert_eq!(send.len(), 1);
+
+            if self.is_finished.load(Ordering::Relaxed) {
+                send[0] = PortState::Done;
+                assert!(
+                    self.async_task_data.try_lock().unwrap().is_none(),
+                    "should have already been shut down"
+                );
+            } else if send[0] == PortState::Done {
+                {
+                    // Early shutdown - our port state was set to `Done` by the downstream nodes.
+                    self.shutdown_in_background();
+                };
+                self.is_finished.store(true, Ordering::Relaxed);
+            } else {
+                send[0] = PortState::Ready
+            }
+
+            Ok(())
+        }
+
+        fn spawn<'env, 's>(
+            &'env mut self,
+            scope: &'s TaskScope<'s, 'env>,
+            recv: &mut [Option<RecvPort<'_>>],
+            send: &mut [Option<SendPort<'_>>],
+            _state: &'s ExecutionState,
+            join_handles: &mut Vec<JoinHandle<PolarsResult<()>>>,
+        ) {
+            use std::sync::atomic::Ordering;
+
+            assert!(recv.is_empty());
+            assert_eq!(send.len(), 1);
+            assert!(!self.is_finished.load(Ordering::Relaxed));
+
+            let morsel_senders = send[0].take().unwrap().parallel();
+
+            let mut async_task_data_guard = self.async_task_data.try_lock().unwrap();
+            let (raw_morsel_receivers, _) = async_task_data_guard.as_mut().unwrap();
+
+            assert_eq!(raw_morsel_receivers.len(), morsel_senders.len());
+
+            if let Some(v) = self.morsel_stream_starter.take() {
+                v.send(()).unwrap();
+            }
+            let is_finished = self.is_finished.clone();
+
+            let task_handles = raw_morsel_receivers
+                .drain(..)
+                .zip(morsel_senders)
+                .map(|(mut raw_morsel_rx, mut morsel_tx)| {
+                    let is_finished = is_finished.clone();
+
+                    scope.spawn_task(TaskPriority::Low, async move {
+                        let source_token = SourceToken::new();
+                        loop {
+                            let Ok((df, morsel_seq, wait_token)) = raw_morsel_rx.recv().await
+                            else {
+                                is_finished.store(true, Ordering::Relaxed);
+                                break;
+                            };
+
+                            let mut morsel = Morsel::new(df, morsel_seq, source_token.clone());
+                            morsel.set_consume_token(wait_token);
+
+                            if morsel_tx.send(morsel).await.is_err() {
+                                break;
+                            }
+
+                            if source_token.stop_requested() {
+                                break;
+                            }
+                        }
+
+                        raw_morsel_rx
+                    })
+                })
+                .collect::<Vec<_>>();
+
+            drop(async_task_data_guard);
+
+            let async_task_data = self.async_task_data.clone();
+
+            join_handles.push(scope.spawn_task(TaskPriority::Low, async move {
+                {
+                    let mut async_task_data_guard = async_task_data.try_lock().unwrap();
+                    let (raw_morsel_receivers, _) = async_task_data_guard.as_mut().unwrap();
+
+                    for handle in task_handles {
+                        raw_morsel_receivers.push(handle.await);
+                    }
+                }
+
+                if self.is_finished.load(Ordering::Relaxed) {
+                    self.shutdown().await?;
+                }
+
+                Ok(())
+            }))
+        }
+    }
+}
+
+impl ParquetSourceNode {
+    /// # Panics
+    /// Panics if called more than once.
+    async fn shutdown_impl(
+        async_task_data: Arc<tokio::sync::Mutex<AsyncTaskData>>,
+        verbose: bool,
+    ) -> PolarsResult<()> {
+        if verbose {
+            eprintln!("[ParquetSource]: Shutting down");
+        }
+
+        let (mut raw_morsel_receivers, morsel_stream_task_handle) =
+            async_task_data.try_lock().unwrap().take().unwrap();
+
+        raw_morsel_receivers.clear();
+        // Join on the producer handle to catch errors/panics.
+        // Safety
+        // * We dropped the receivers on the line above
+        // * This function is only called once.
+        morsel_stream_task_handle.await
+    }
+
+    fn shutdown(&self) -> impl Future<Output = PolarsResult<()>> {
+        if self.verbose {
+            eprintln!("[ParquetSource]: Shutdown via `shutdown()`");
+        }
+        Self::shutdown_impl(self.async_task_data.clone(), self.verbose)
+    }
+
+    /// Spawns a task to shut down the source node to avoid blocking the current thread. This is
+    /// usually called when data is no longer needed from the source node, as such it does not
+    /// propagate any (non-critical) errors. If on the other hand the source node does not provide
+    /// more data when requested, then it is more suitable to call [`Self::shutdown`], as it returns
+    /// a result that can be used to distinguish between whether the data stream stopped due to an
+    /// error or EOF.
+    fn shutdown_in_background(&self) {
+        if self.verbose {
+            eprintln!("[ParquetSource]: Shutdown via `shutdown_in_background()`");
+        }
+        let async_task_data = self.async_task_data.clone();
+        polars_io::pl_async::get_runtime()
+            .spawn(Self::shutdown_impl(async_task_data, self.verbose));
+    }
+
+    /// Constructs the task that provides a morsel stream.
+    #[allow(clippy::type_complexity)]
+    fn init_raw_morsel_stream(
+        &mut self,
+    ) -> (
+        Vec<crate::async_primitives::connector::Receiver<(DataFrame, MorselSeq, WaitToken)>>,
+        async_executor::AbortOnDropHandle<PolarsResult<()>>,
+    ) {
+        let verbose = self.verbose;
+
+        let use_statistics = self.options.use_statistics;
+
+        let (mut raw_morsel_senders, raw_morsel_receivers): (Vec<_>, Vec<_>) =
+            (0..self.config.num_pipelines).map(|_| connector()).unzip();
+
+        if let Some((_, 0)) = self.file_options.slice {
+            return (
+                raw_morsel_receivers,
+                async_executor::AbortOnDropHandle::new(async_executor::spawn(
+                    TaskPriority::Low,
+                    std::future::ready(Ok(())),
+                )),
+            );
+        }
+
+        let reader_schema = self
+            .file_info
+            .reader_schema
+            .as_ref()
+            .unwrap()
+            .as_ref()
+            .unwrap_left()
+            .clone();
+
+        let (normalized_slice_oneshot_rx, metadata_rx, metadata_task_handle) =
+            self.init_metadata_fetcher();
+
+        let num_pipelines = self.config.num_pipelines;
+        let row_group_prefetch_size = self.config.row_group_prefetch_size;
+        let projection = self.file_options.with_columns.clone();
+        assert_eq!(self.physical_predicate.is_some(), self.predicate.is_some());
+        let predicate = self.physical_predicate.clone();
+        let memory_prefetch_func = self.memory_prefetch_func;
+        let (start_tx, start_rx) = tokio::sync::oneshot::channel();
+        self.morsel_stream_starter = Some(start_tx);
+
+        let mut row_group_data_fetcher = RowGroupDataFetcher {
+            metadata_rx,
+            use_statistics,
+            verbose,
+            reader_schema,
+            projection,
+            predicate,
+            slice_range: None, // Initialized later
+            memory_prefetch_func,
+            current_path_index: 0,
+            current_byte_source: Default::default(),
+            current_row_groups: Default::default(),
+            current_row_group_idx: 0,
+            current_max_row_group_height: 0,
+            current_row_offset: 0,
+            current_shared_file_state: Default::default(),
+        };
+
+        let row_group_decoder = self.init_row_group_decoder();
+        let row_group_decoder = Arc::new(row_group_decoder);
+
+        // Processes row group metadata and spawns I/O tasks to fetch row group data. This is
+        // currently spawned onto the CPU runtime as it does not directly make any async I/O calls,
+        // but instead it potentially performs predicate/slice evaluation on metadata. If we observe
+        // that under heavy CPU load scenarios the I/O throughput drops due to this task not being
+        // scheduled we can change it to be a high priority task.
+        let morsel_stream_task_handle = async_executor::spawn(TaskPriority::Low, async move {
+            if start_rx.await.is_err() {
+                drop(row_group_data_fetcher);
+                return metadata_task_handle.await.unwrap();
+            }
+
+            if verbose {
+                eprintln!("[ParquetSource]: Starting row group data fetch")
+            }
+
+            // We must `recv()` from the `NotifyReceiver` before awaiting on the
+            // `normalized_slice_oneshot_rx`, as in the negative offset case the slice resolution
+            // only runs after the first notify.
+            if !row_group_data_fetcher.init_next_file_state().await {
+                drop(row_group_data_fetcher);
+                return metadata_task_handle.await.unwrap();
+            };
+
+            let slice_range = {
+                let Ok(slice) = normalized_slice_oneshot_rx.await else {
+                    // If we are here then the producer probably errored.
+                    drop(row_group_data_fetcher);
+                    return metadata_task_handle.await.unwrap();
+                };
+
+                slice.map(|(offset, len)| offset..offset + len)
+            };
+
+            row_group_data_fetcher.slice_range = slice_range;
+
+            // Pins a wait group to a channel index.
+            struct IndexedWaitGroup {
+                index: usize,
+                wait_group: WaitGroup,
+            }
+
+            impl IndexedWaitGroup {
+                async fn wait(self) -> Self {
+                    self.wait_group.wait().await;
+                    self
+                }
+            }
+
+            // Ensure proper backpressure by only polling the buffered iterator when a wait group
+            // is free.
+            let mut wait_groups = (0..num_pipelines)
+                .map(|index| {
+                    let wait_group = WaitGroup::default();
+                    {
+                        let _prime_this_wait_group = wait_group.token();
+                    }
+                    IndexedWaitGroup {
+                        index,
+                        wait_group: WaitGroup::default(),
+                    }
+                    .wait()
+                })
+                .collect::<FuturesUnordered<_>>();
+
+            let mut df_stream = row_group_data_fetcher
+                .into_stream()
+                .map(|x| async {
+                    match x {
+                        Ok(handle) => handle.await,
+                        Err(e) => Err(e),
+                    }
+                })
+                .buffered(row_group_prefetch_size)
+                .map(|x| async {
+                    let row_group_decoder = row_group_decoder.clone();
+
+                    match x {
+                        Ok(row_group_data) => {
+                            async_executor::spawn(TaskPriority::Low, async move {
+                                row_group_decoder.row_group_data_to_df(row_group_data).await
+                            })
+                            .await
+                        },
+                        Err(e) => Err(e),
+                    }
+                })
+                .buffered(
+                    // Because we are using an ordered buffer, we may suffer from head-of-line blocking,
+                    // so we add a small amount of buffer.
+                    num_pipelines + 4,
+                );
+
+            let morsel_seq_ref = &mut MorselSeq::default();
+            let mut dfs = vec![].into_iter();
+
+            'main: loop {
+                let Some(mut indexed_wait_group) = wait_groups.next().await else {
+                    break;
+                };
+
+                if dfs.len() == 0 {
+                    let Some(v) = df_stream.next().await else {
+                        break;
+                    };
+
+                    let v = v?;
+                    assert!(!v.is_empty());
+
+                    dfs = v.into_iter();
+                }
+
+                let mut df = dfs.next().unwrap();
+                let morsel_seq = *morsel_seq_ref;
+                *morsel_seq_ref = morsel_seq.successor();
+
+                loop {
+                    use crate::async_primitives::connector::SendError;
+
+                    let channel_index = indexed_wait_group.index;
+                    let wait_token = indexed_wait_group.wait_group.token();
+
+                    match raw_morsel_senders[channel_index].try_send((df, morsel_seq, wait_token)) {
+                        Ok(_) => {
+                            wait_groups.push(indexed_wait_group.wait());
+                            break;
+                        },
+                        Err(SendError::Closed(v)) => {
+                            // The port assigned to this wait group has been closed, so we will not
+                            // add it back to the list of wait groups, and we will try to send this
+                            // across another port.
+                            df = v.0
+                        },
+                        Err(SendError::Full(_)) => unreachable!(),
+                    }
+
+                    let Some(v) = wait_groups.next().await else {
+                        // All ports have closed
+                        break 'main;
+                    };
+
+                    indexed_wait_group = v;
+                }
+            }
+
+            // Join on the producer handle to catch errors/panics.
+            drop(df_stream);
+            metadata_task_handle.await.unwrap()
+        });
+
+        let morsel_stream_task_handle =
+            async_executor::AbortOnDropHandle::new(morsel_stream_task_handle);
+
+        (raw_morsel_receivers, morsel_stream_task_handle)
+    }
+
+    /// Constructs the task that fetches file metadata.
+    /// Note: This must be called AFTER `self.projected_arrow_fields` has been initialized.
+    ///
+    /// TODO: During IR conversion the metadata of the first file is already downloaded - see if
+    /// we can find a way to re-use it.
+    #[allow(clippy::type_complexity)]
+    fn init_metadata_fetcher(
+        &self,
+    ) -> (
+        tokio::sync::oneshot::Receiver<Option<(usize, usize)>>,
+        NotifyReceiver<(usize, usize, Arc<DynByteSource>, FileMetaData, usize)>,
+        task_handles_ext::AbortOnDropHandle<PolarsResult<()>>,
+    ) {
+        let verbose = self.verbose;
+        let io_runtime = polars_io::pl_async::get_runtime();
+
+        assert!(
+            !self.projected_arrow_fields.is_empty()
+                || self.file_options.with_columns.as_deref() == Some(&[])
+        );
+        let projected_arrow_fields = self.projected_arrow_fields.clone();
+        let needs_max_row_group_height_calc =
+            self.file_options.include_file_paths.is_some() || self.hive_parts.is_some();
+
+        let (normalized_slice_oneshot_tx, normalized_slice_oneshot_rx) =
+            tokio::sync::oneshot::channel();
+        let (metadata_tx, mut metadata_notify_rx, metadata_rx) = notify_channel();
+
+        let byte_source_builder = self.byte_source_builder.clone();
+
+        if self.verbose {
+            eprintln!(
+                "[ParquetSource]: Byte source builder: {:?}",
+                &byte_source_builder
+            );
+        }
+
+        let fetch_metadata_bytes_for_path_index = {
+            let paths = &self.paths;
+            let cloud_options = Arc::new(self.cloud_options.clone());
+
+            let paths = paths.clone();
+            let cloud_options = cloud_options.clone();
+            let byte_source_builder = byte_source_builder.clone();
+
+            move |path_idx: usize| {
+                let paths = paths.clone();
+                let cloud_options = cloud_options.clone();
+                let byte_source_builder = byte_source_builder.clone();
+
+                let handle = io_runtime.spawn(async move {
+                    let mut byte_source = Arc::new(
+                        byte_source_builder
+                            .try_build_from_path(
+                                paths[path_idx].to_str().unwrap(),
+                                cloud_options.as_ref().as_ref(),
+                            )
+                            .await?,
+                    );
+                    let (metadata_bytes, maybe_full_bytes) =
+                        read_parquet_metadata_bytes(byte_source.as_ref(), verbose).await?;
+
+                    if let Some(v) = maybe_full_bytes {
+                        if !matches!(byte_source.as_ref(), DynByteSource::MemSlice(_)) {
+                            if verbose {
+                                eprintln!(
+                                    "[ParquetSource]: Parquet file was fully fetched during \
+                                         metadata read ({} bytes).",
+                                    v.len(),
+                                );
+                            }
+
+                            byte_source = Arc::new(DynByteSource::from(MemSliceByteSource(v)))
+                        }
+                    }
+
+                    PolarsResult::Ok((path_idx, byte_source, metadata_bytes))
+                });
+
+                let handle = task_handles_ext::AbortOnDropHandle(handle);
+
+                std::future::ready(handle)
+            }
+        };
+
+        let process_metadata_bytes = {
+            move |handle: task_handles_ext::AbortOnDropHandle<
+                PolarsResult<(usize, Arc<DynByteSource>, MemSlice)>,
+            >| {
+                let projected_arrow_fields = projected_arrow_fields.clone();
+                // Run on CPU runtime - metadata deserialization is expensive, especially
+                // for very wide tables.
+                let handle = async_executor::spawn(TaskPriority::Low, async move {
+                    let (path_index, byte_source, metadata_bytes) = handle.await.unwrap()?;
+
+                    let metadata = polars_parquet::parquet::read::deserialize_metadata(
+                        metadata_bytes.as_ref(),
+                        metadata_bytes.len() * 2 + 1024,
+                    )?;
+
+                    ensure_metadata_has_projected_fields(
+                        projected_arrow_fields.as_ref(),
+                        &metadata,
+                    )?;
+
+                    let file_max_row_group_height = if needs_max_row_group_height_calc {
+                        metadata
+                            .row_groups
+                            .iter()
+                            .map(|x| x.num_rows())
+                            .max()
+                            .unwrap_or(0)
+                    } else {
+                        0
+                    };
+
+                    PolarsResult::Ok((path_index, byte_source, metadata, file_max_row_group_height))
+                });
+
+                async_executor::AbortOnDropHandle::new(handle)
+            }
+        };
+
+        let metadata_prefetch_size = self.config.metadata_prefetch_size;
+        let metadata_decode_ahead_size = self.config.metadata_decode_ahead_size;
+
+        let metadata_task_handle = if self
+            .file_options
+            .slice
+            .map(|(offset, _)| offset >= 0)
+            .unwrap_or(true)
+        {
+            normalized_slice_oneshot_tx
+                .send(
+                    self.file_options
+                        .slice
+                        .map(|(offset, len)| (offset as usize, len)),
+                )
+                .unwrap();
+
+            // Safety: `offset + len` does not overflow.
+            let slice_range = self
+                .file_options
+                .slice
+                .map(|(offset, len)| offset as usize..offset as usize + len);
+
+            let mut metadata_stream = futures::stream::iter(0..self.paths.len())
+                .map(fetch_metadata_bytes_for_path_index)
+                .buffered(metadata_prefetch_size)
+                .map(process_metadata_bytes)
+                .buffered(metadata_decode_ahead_size);
+
+            let paths = self.paths.clone();
+
+            // We need to be able to both stop early as well as skip values, which is easier to do
+            // using a custom task instead of futures::stream
+            io_runtime.spawn(async move {
+                let current_row_offset_ref = &mut 0usize;
+                let current_path_index_ref = &mut 0usize;
+
+                'main: while metadata_notify_rx.recv().await.is_some() {
+                    loop {
+                        let current_path_index = *current_path_index_ref;
+                        *current_path_index_ref += 1;
+
+                        let Some(v) = metadata_stream.next().await else {
+                            break 'main;
+                        };
+
+                        let (path_index, byte_source, metadata, file_max_row_group_height) = v
+                            .map_err(|err| {
+                                err.wrap_msg(|msg| {
+                                    format!(
+                                        "error at path (index: {}, path: {}): {}",
+                                        current_path_index,
+                                        paths[current_path_index].to_str().unwrap(),
+                                        msg
+                                    )
+                                })
+                            })?;
+
+                        assert_eq!(path_index, current_path_index);
+
+                        let current_row_offset = *current_row_offset_ref;
+                        *current_row_offset_ref =
+                            current_row_offset.saturating_add(metadata.num_rows);
+
+                        if let Some(slice_range) = slice_range.clone() {
+                            match SplitSlicePosition::split_slice_at_file(
+                                current_row_offset,
+                                metadata.num_rows,
+                                slice_range,
+                            ) {
+                                SplitSlicePosition::Before => {
+                                    if verbose {
+                                        eprintln!(
+                                            "[ParquetSource]: Slice pushdown: \
+                                            Skipped file at index {} ({} rows)",
+                                            current_path_index, metadata.num_rows
+                                        );
+                                    }
+                                    continue;
+                                },
+                                SplitSlicePosition::After => unreachable!(),
+                                SplitSlicePosition::Overlapping(..) => {},
+                            };
+                        };
+
+                        {
+                            use tokio::sync::mpsc::error::*;
+                            match metadata_tx.try_send((
+                                path_index,
+                                current_row_offset,
+                                byte_source,
+                                metadata,
+                                file_max_row_group_height,
+                            )) {
+                                Err(TrySendError::Closed(_)) => break 'main,
+                                Ok(_) => {},
+                                Err(TrySendError::Full(_)) => unreachable!(),
+                            }
+                        }
+
+                        if let Some(slice_range) = slice_range.as_ref() {
+                            if *current_row_offset_ref >= slice_range.end {
+                                if verbose {
+                                    eprintln!(
+                                        "[ParquetSource]: Slice pushdown: \
+                                        Stopped reading at file at index {} \
+                                        (remaining {} files will not be read)",
+                                        current_path_index,
+                                        paths.len() - current_path_index - 1,
+                                    );
+                                }
+                                break 'main;
+                            }
+                        };
+
+                        break;
+                    }
+                }
+
+                Ok(())
+            })
+        } else {
+            // Walk the files in reverse to translate the slice into a positive offset.
+            let slice = self.file_options.slice.unwrap();
+            let slice_start_as_n_from_end = -slice.0 as usize;
+
+            let mut metadata_stream = futures::stream::iter((0..self.paths.len()).rev())
+                .map(fetch_metadata_bytes_for_path_index)
+                .buffered(metadata_prefetch_size)
+                .map(process_metadata_bytes)
+                .buffered(metadata_decode_ahead_size);
+
+            // Note:
+            // * We want to wait until the first morsel is requested before starting this
+            let init_negative_slice_and_metadata = async move {
+                let mut processed_metadata_rev = vec![];
+                let mut cum_rows = 0;
+
+                while let Some(v) = metadata_stream.next().await {
+                    let v = v?;
+                    let (_, _, metadata, _) = &v;
+                    cum_rows += metadata.num_rows;
+                    processed_metadata_rev.push(v);
+
+                    if cum_rows >= slice_start_as_n_from_end {
+                        break;
+                    }
+                }
+
+                let (start, len) = if slice_start_as_n_from_end > cum_rows {
+                    // We need to trim the slice, e.g. SLICE[offset: -100, len: 75] on a file of 50
+                    // rows should only give the first 25 rows.
+                    let first_file_position = slice_start_as_n_from_end - cum_rows;
+                    (0, slice.1.saturating_sub(first_file_position))
+                } else {
+                    (cum_rows - slice_start_as_n_from_end, slice.1)
+                };
+
+                if len == 0 {
+                    processed_metadata_rev.clear();
+                }
+
+                normalized_slice_oneshot_tx
+                    .send(Some((start, len)))
+                    .unwrap();
+
+                let slice_range = start..(start + len);
+
+                PolarsResult::Ok((slice_range, processed_metadata_rev, cum_rows))
+            };
+
+            let path_count = self.paths.len();
+
+            io_runtime.spawn(async move {
+                // Wait for the first morsel request before we call `init_negative_slice_and_metadata`
+                // This also means the receiver must `recv()` once before awaiting on the
+                // `normalized_slice_oneshot_rx` to avoid hanging.
+                if metadata_notify_rx.recv().await.is_none() {
+                    return Ok(());
+                }
+
+                let (slice_range, processed_metadata_rev, cum_rows) =
+                    async_executor::AbortOnDropHandle::new(async_executor::spawn(
+                        TaskPriority::Low,
+                        init_negative_slice_and_metadata,
+                    ))
+                    .await?;
+
+                if verbose {
+                    if let Some((path_index, ..)) = processed_metadata_rev.last() {
+                        eprintln!(
+                            "[ParquetSource]: Slice pushdown: Negatively-offsetted slice {:?} \
+                            begins at file index {}, translated to {:?}",
+                            slice, path_index, slice_range
+                        );
+                    } else {
+                        eprintln!(
+                            "[ParquetSource]: Slice pushdown: Negatively-offsetted slice {:?} \
+                            skipped all files ({} files containing {} rows)",
+                            slice, path_count, cum_rows
+                        )
+                    }
+                }
+
+                let mut metadata_iter = processed_metadata_rev.into_iter().rev();
+                let current_row_offset_ref = &mut 0usize;
+
+                // do-while: We already consumed a notify above.
+                loop {
+                    let Some((
+                        current_path_index,
+                        byte_source,
+                        metadata,
+                        file_max_row_group_height,
+                    )) = metadata_iter.next()
+                    else {
+                        break;
+                    };
+
+                    let current_row_offset = *current_row_offset_ref;
+                    *current_row_offset_ref = current_row_offset.saturating_add(metadata.num_rows);
+
+                    assert!(matches!(
+                        SplitSlicePosition::split_slice_at_file(
+                            current_row_offset,
+                            metadata.num_rows,
+                            slice_range.clone(),
+                        ),
+                        SplitSlicePosition::Overlapping(..)
+                    ));
+
+                    {
+                        use tokio::sync::mpsc::error::*;
+                        match metadata_tx.try_send((
+                            current_path_index,
+                            current_row_offset,
+                            byte_source,
+                            metadata,
+                            file_max_row_group_height,
+                        )) {
+                            Err(TrySendError::Closed(_)) => break,
+                            Ok(v) => v,
+                            Err(TrySendError::Full(_)) => unreachable!(),
+                        }
+                    }
+
+                    if *current_row_offset_ref >= slice_range.end {
+                        if verbose {
+                            eprintln!(
+                                "[ParquetSource]: Slice pushdown: \
+                                Stopped reading at file at index {} \
+                                (remaining {} files will not be read)",
+                                current_path_index,
+                                path_count - current_path_index - 1,
+                            );
+                        }
+                        break;
+                    }
+
+                    if metadata_notify_rx.recv().await.is_none() {
+                        break;
+                    }
+                }
+
+                Ok(())
+            })
+        };
+
+        let metadata_task_handle = task_handles_ext::AbortOnDropHandle(metadata_task_handle);
+
+        (
+            normalized_slice_oneshot_rx,
+            metadata_rx,
+            metadata_task_handle,
+        )
+    }
+
+    /// Creates a `RowGroupDecoder` that turns `RowGroupData` into DataFrames.
+    /// This must be called AFTER the following have been initialized:
+    /// * `self.projected_arrow_fields`
+    /// * `self.physical_predicate`
+    fn init_row_group_decoder(&self) -> RowGroupDecoder {
+        assert!(
+            !self.projected_arrow_fields.is_empty()
+                || self.file_options.with_columns.as_deref() == Some(&[])
+        );
+        assert_eq!(self.predicate.is_some(), self.physical_predicate.is_some());
+
+        let paths = self.paths.clone();
+        let hive_partitions = self.hive_parts.clone();
+        let hive_partitions_width = hive_partitions
+            .as_deref()
+            .map(|x| x[0].get_statistics().column_stats().len())
+            .unwrap_or(0);
+        let include_file_paths = self.file_options.include_file_paths.clone();
+        let projected_arrow_fields = self.projected_arrow_fields.clone();
+        let row_index = self.file_options.row_index.clone();
+        let physical_predicate = self.physical_predicate.clone();
+        let ideal_morsel_size = get_ideal_morsel_size();
+
+        RowGroupDecoder {
+            paths,
+            hive_partitions,
+            hive_partitions_width,
+            include_file_paths,
+            projected_arrow_fields,
+            row_index,
+            physical_predicate,
+            ideal_morsel_size,
+        }
+    }
+
+    fn init_projected_arrow_fields(&mut self) {
+        let reader_schema = self
+            .file_info
+            .reader_schema
+            .as_ref()
+            .unwrap()
+            .as_ref()
+            .unwrap_left()
+            .clone();
+
+        self.projected_arrow_fields =
+            if let Some(columns) = self.file_options.with_columns.as_deref() {
+                columns
+                    .iter()
+                    .map(|x| {
+                        // `index_of` on ArrowSchema is slow, so we use the polars native Schema,
+                        // but we need to remember to subtact the row index.
+                        let pos = self.file_info.schema.index_of(x.as_str()).unwrap()
+                            - (self.file_options.row_index.is_some() as usize);
+                        reader_schema.fields[pos].clone()
+                    })
+                    .collect()
+            } else {
+                Arc::from(reader_schema.fields.as_slice())
+            };
+
+        if self.verbose {
+            eprintln!(
+                "[ParquetSource]: {} columns to be projected from {} files",
+                self.projected_arrow_fields.len(),
+                self.paths.len(),
+            );
+        }
+    }
+}
+
+#[derive(Debug)]
+struct Config {
+    num_pipelines: usize,
+    /// Number of files to pre-fetch metadata for concurrently
+    metadata_prefetch_size: usize,
+    /// Number of files to decode metadata for in parallel in advance
+    metadata_decode_ahead_size: usize,
+    /// Number of row groups to pre-fetch concurrently, this can be across files
+    row_group_prefetch_size: usize,
+}
+
+/// Represents byte-data that can be transformed into a DataFrame after some computation.
+struct RowGroupData {
+    byte_source: FetchedBytes,
+    path_index: usize,
+    row_offset: usize,
+    slice: Option<(usize, usize)>,
+    file_max_row_group_height: usize,
+    row_group_metadata: RowGroupMetaData,
+    shared_file_state: Arc<tokio::sync::OnceCell<SharedFileState>>,
+}
+
+struct RowGroupDataFetcher {
+    metadata_rx: NotifyReceiver<(usize, usize, Arc<DynByteSource>, FileMetaData, usize)>,
+    use_statistics: bool,
+    verbose: bool,
+    reader_schema: Arc<ArrowSchema>,
+    projection: Option<Arc<[String]>>,
+    predicate: Option<Arc<dyn PhysicalIoExpr>>,
+    slice_range: Option<std::ops::Range<usize>>,
+    memory_prefetch_func: fn(&[u8]) -> (),
+    current_path_index: usize,
+    current_byte_source: Arc<DynByteSource>,
+    current_row_groups: std::vec::IntoIter<RowGroupMetaData>,
+    current_row_group_idx: usize,
+    current_max_row_group_height: usize,
+    current_row_offset: usize,
+    current_shared_file_state: Arc<tokio::sync::OnceCell<SharedFileState>>,
+}
+
+fn read_this_row_group(
+    rg_md: &RowGroupMetaData,
+    predicate: Option<&dyn PhysicalIoExpr>,
+    reader_schema: &ArrowSchema,
+) -> PolarsResult<bool> {
+    let Some(pred) = predicate else {
+        return Ok(true);
+    };
+    use polars_io::prelude::_internal::*;
+    // TODO!
+    // Optimize this. Now we partition the predicate columns twice. (later on reading as well)
+    // I think we must add metadata context where we can cache and amortize the partitioning.
+    let mut part_md = PartitionedColumnChunkMD::new(rg_md);
+    let live = pred.live_variables();
+    part_md.set_partitions(
+        live.as_ref()
+            .map(|vars| vars.iter().map(|s| s.as_ref()).collect::<PlHashSet<_>>())
+            .as_ref(),
+    );
+    read_this_row_group(Some(pred), &part_md, reader_schema)
+}
+
+impl RowGroupDataFetcher {
+    fn into_stream(self) -> RowGroupDataStream {
+        RowGroupDataStream::new(self)
+    }
+
+    async fn init_next_file_state(&mut self) -> bool {
+        let Some((path_index, row_offset, byte_source, metadata, file_max_row_group_height)) =
+            self.metadata_rx.recv().await
+        else {
+            return false;
+        };
+
+        self.current_path_index = path_index;
+        self.current_byte_source = byte_source;
+        self.current_max_row_group_height = file_max_row_group_height;
+        // The metadata task also sends a row offset to start counting from as it may skip files
+        // during slice pushdown.
+        self.current_row_offset = row_offset;
+        self.current_row_group_idx = 0;
+        self.current_row_groups = metadata.row_groups.into_iter();
+        self.current_shared_file_state = Default::default();
+
+        true
+    }
+
+    async fn next(
+        &mut self,
+    ) -> Option<PolarsResult<async_executor::AbortOnDropHandle<PolarsResult<RowGroupData>>>> {
+        'main: loop {
+            for row_group_metadata in self.current_row_groups.by_ref() {
+                let current_row_offset = self.current_row_offset;
+                let current_row_group_idx = self.current_row_group_idx;
+
+                let num_rows = row_group_metadata.num_rows();
+
+                self.current_row_offset = current_row_offset.saturating_add(num_rows);
+                self.current_row_group_idx += 1;
+
+                if self.use_statistics
+                    && !match read_this_row_group(
+                        &row_group_metadata,
+                        self.predicate.as_deref(),
+                        self.reader_schema.as_ref(),
+                    ) {
+                        Ok(v) => v,
+                        Err(e) => return Some(Err(e)),
+                    }
+                {
+                    if self.verbose {
+                        eprintln!(
+                            "[ParquetSource]: Predicate pushdown: \
+                            Skipped row group {} in file {} ({} rows)",
+                            current_row_group_idx, self.current_path_index, num_rows
+                        );
+                    }
+                    continue;
+                }
+
+                if num_rows > IdxSize::MAX as usize {
+                    let msg = operation_exceeded_idxsize_msg(
+                        format!("number of rows in row group ({})", num_rows).as_str(),
+                    );
+                    return Some(Err(polars_err!(ComputeError: msg)));
+                }
+
+                let slice = if let Some(slice_range) = self.slice_range.clone() {
+                    let (offset, len) = match SplitSlicePosition::split_slice_at_file(
+                        current_row_offset,
+                        num_rows,
+                        slice_range,
+                    ) {
+                        SplitSlicePosition::Before => {
+                            if self.verbose {
+                                eprintln!(
+                                    "[ParquetSource]: Slice pushdown: \
+                                    Skipped row group {} in file {} ({} rows)",
+                                    current_row_group_idx, self.current_path_index, num_rows
+                                );
+                            }
+                            continue;
+                        },
+                        SplitSlicePosition::After => {
+                            if self.verbose {
+                                eprintln!(
+                                    "[ParquetSource]: Slice pushdown: \
+                                    Stop at row group {} in file {} \
+                                    (remaining {} row groups will not be read)",
+                                    current_row_group_idx,
+                                    self.current_path_index,
+                                    self.current_row_groups.len(),
+                                );
+                            };
+                            break 'main;
+                        },
+                        SplitSlicePosition::Overlapping(offset, len) => (offset, len),
+                    };
+
+                    Some((offset, len))
+                } else {
+                    None
+                };
+
+                let current_byte_source = self.current_byte_source.clone();
+                let projection = self.projection.clone();
+                let current_shared_file_state = self.current_shared_file_state.clone();
+                let memory_prefetch_func = self.memory_prefetch_func;
+                let io_runtime = polars_io::pl_async::get_runtime();
+                let current_path_index = self.current_path_index;
+                let current_max_row_group_height = self.current_max_row_group_height;
+
+                // Push calculation of byte ranges to a task to run in parallel, as it can be
+                // expensive for very wide tables and projections.
+                let handle = async_executor::spawn(TaskPriority::Low, async move {
+                    let byte_source = if let DynByteSource::MemSlice(mem_slice) =
+                        current_byte_source.as_ref()
+                    {
+                        // Skip byte range calculation for `no_prefetch`.
+                        if memory_prefetch_func as usize != mem_prefetch_funcs::no_prefetch as usize
+                        {
+                            let slice = mem_slice.0.as_ref();
+
+                            if let Some(columns) = projection.as_ref() {
+                                for range in get_row_group_byte_ranges_for_projection(
+                                    &row_group_metadata,
+                                    columns.as_ref(),
+                                ) {
+                                    memory_prefetch_func(unsafe {
+                                        slice.get_unchecked_release(range)
+                                    })
+                                }
+                            } else {
+                                let mut iter = get_row_group_byte_ranges(&row_group_metadata);
+                                let first = iter.next().unwrap();
+                                let range =
+                                    iter.fold(first, |l, r| l.start.min(r.start)..l.end.max(r.end));
+
+                                memory_prefetch_func(unsafe { slice.get_unchecked_release(range) })
+                            };
+                        }
+
+                        // We have a mmapped or in-memory slice representing the entire
+                        // file that can be sliced directly, so we can skip the byte-range
+                        // calculations and HashMap allocation.
+                        let mem_slice = mem_slice.0.clone();
+                        FetchedBytes::MemSlice {
+                            offset: 0,
+                            mem_slice,
+                        }
+                    } else if let Some(columns) = projection.as_ref() {
+                        let ranges = get_row_group_byte_ranges_for_projection(
+                            &row_group_metadata,
+                            columns.as_ref(),
+                        )
+                        .collect::<Arc<[_]>>();
+
+                        let bytes = {
+                            let ranges_2 = ranges.clone();
+                            task_handles_ext::AbortOnDropHandle(io_runtime.spawn(async move {
+                                current_byte_source.get_ranges(ranges_2.as_ref()).await
+                            }))
+                            .await
+                            .unwrap()?
+                        };
+
+                        assert_eq!(bytes.len(), ranges.len());
+
+                        let mut bytes_map = PlHashMap::with_capacity(ranges.len());
+
+                        for (range, bytes) in ranges.iter().zip(bytes) {
+                            memory_prefetch_func(bytes.as_ref());
+                            let v = bytes_map.insert(range.start, bytes);
+                            debug_assert!(v.is_none(), "duplicate range start {}", range.start);
+                        }
+
+                        FetchedBytes::BytesMap(bytes_map)
+                    } else {
+                        // We have a dedicated code-path for a full projection that performs a
+                        // single range request for the entire row group. During testing this
+                        // provided much higher throughput from cloud than making multiple range
+                        // request with `get_ranges()`.
+                        let mut iter = get_row_group_byte_ranges(&row_group_metadata);
+                        let mut ranges = Vec::with_capacity(iter.len());
+                        let first = iter.next().unwrap();
+                        ranges.push(first.clone());
+                        let full_range = iter.fold(first, |l, r| {
+                            ranges.push(r.clone());
+                            l.start.min(r.start)..l.end.max(r.end)
+                        });
+
+                        let mem_slice = {
+                            let full_range_2 = full_range.clone();
+                            task_handles_ext::AbortOnDropHandle(io_runtime.spawn(async move {
+                                current_byte_source.get_range(full_range_2).await
+                            }))
+                            .await
+                            .unwrap()?
+                        };
+
+                        FetchedBytes::MemSlice {
+                            offset: full_range.start,
+                            mem_slice,
+                        }
+                    };
+
+                    PolarsResult::Ok(RowGroupData {
+                        byte_source,
+                        path_index: current_path_index,
+                        row_offset: current_row_offset,
+                        slice,
+                        file_max_row_group_height: current_max_row_group_height,
+                        row_group_metadata,
+                        shared_file_state: current_shared_file_state.clone(),
+                    })
+                });
+
+                let handle = async_executor::AbortOnDropHandle::new(handle);
+                return Some(Ok(handle));
+            }
+
+            // Initialize state to the next file.
+            if !self.init_next_file_state().await {
+                break;
+            }
+        }
+
+        None
+    }
+}
+
+enum FetchedBytes {
+    MemSlice { mem_slice: MemSlice, offset: usize },
+    BytesMap(PlHashMap<usize, MemSlice>),
+}
+
+impl FetchedBytes {
+    fn get_range(&self, range: std::ops::Range<usize>) -> MemSlice {
+        match self {
+            Self::MemSlice { mem_slice, offset } => {
+                let offset = *offset;
+                debug_assert!(range.start >= offset);
+                mem_slice.slice(range.start - offset..range.end - offset)
+            },
+            Self::BytesMap(v) => {
+                let v = v.get(&range.start).unwrap();
+                debug_assert_eq!(v.len(), range.len());
+                v.clone()
+            },
+        }
+    }
+}
+
+#[rustfmt::skip]
+type RowGroupDataStreamFut = std::pin::Pin<Box<
+    dyn Future<
+        Output =
+            (
+                Box<RowGroupDataFetcher>           ,
+                Option                             <
+                PolarsResult                       <
+                async_executor::AbortOnDropHandle  <
+                PolarsResult                       <
+                RowGroupData     >     >     >     >
+            )
+    > + Send
+>>;
+
+struct RowGroupDataStream {
+    current_future: RowGroupDataStreamFut,
+}
+
+impl RowGroupDataStream {
+    fn new(row_group_data_fetcher: RowGroupDataFetcher) -> Self {
+        // [`RowGroupDataFetcher`] is a big struct, so we Box it once here to avoid boxing it on
+        // every `next()` call.
+        let current_future = Self::call_next_owned(Box::new(row_group_data_fetcher));
+        Self { current_future }
+    }
+
+    fn call_next_owned(
+        mut row_group_data_fetcher: Box<RowGroupDataFetcher>,
+    ) -> RowGroupDataStreamFut {
+        Box::pin(async move {
+            let out = row_group_data_fetcher.next().await;
+            (row_group_data_fetcher, out)
+        })
+    }
+}
+
+impl futures::stream::Stream for RowGroupDataStream {
+    type Item = PolarsResult<async_executor::AbortOnDropHandle<PolarsResult<RowGroupData>>>;
+
+    fn poll_next(
+        mut self: std::pin::Pin<&mut Self>,
+        cx: &mut std::task::Context<'_>,
+    ) -> std::task::Poll<Option<Self::Item>> {
+        use std::pin::Pin;
+        use std::task::Poll;
+
+        match Pin::new(&mut self.current_future.as_mut()).poll(cx) {
+            Poll::Ready((row_group_data_fetcher, out)) => {
+                if out.is_some() {
+                    self.current_future = Self::call_next_owned(row_group_data_fetcher);
+                }
+
+                Poll::Ready(out)
+            },
+            Poll::Pending => Poll::Pending,
+        }
+    }
+}
+
+/// State shared across row groups for a single file.
+struct SharedFileState {
+    path_index: usize,
+    hive_series: Vec<Series>,
+    file_path_series: Option<Series>,
+}
+
+/// Turns row group data into DataFrames.
+struct RowGroupDecoder {
+    paths: Arc<Vec<PathBuf>>,
+    hive_partitions: Option<Arc<Vec<HivePartitions>>>,
+    hive_partitions_width: usize,
+    include_file_paths: Option<Arc<str>>,
+    projected_arrow_fields: Arc<[polars_core::prelude::ArrowField]>,
+    row_index: Option<RowIndex>,
+    physical_predicate: Option<Arc<dyn PhysicalIoExpr>>,
+    ideal_morsel_size: usize,
+}
+
+impl RowGroupDecoder {
+    async fn row_group_data_to_df(
+        &self,
+        row_group_data: RowGroupData,
+    ) -> PolarsResult<Vec<DataFrame>> {
+        let row_group_data = Arc::new(row_group_data);
+
+        let out_width = self.row_index.is_some() as usize
+            + self.projected_arrow_fields.len()
+            + self.hive_partitions_width
+            + self.include_file_paths.is_some() as usize;
+
+        let mut out_columns = Vec::with_capacity(out_width);
+
+        if self.row_index.is_some() {
+            // Add a placeholder so that we don't have to shift the entire vec
+            // later.
+            out_columns.push(Series::default());
+        }
+
+        let slice_range = row_group_data
+            .slice
+            .map(|(offset, len)| offset..offset + len)
+            .unwrap_or(0..row_group_data.row_group_metadata.num_rows());
+
+        let projected_arrow_fields = &self.projected_arrow_fields;
+        let projected_arrow_fields = projected_arrow_fields.clone();
+
+        let row_group_data_2 = row_group_data.clone();
+        let slice_range_2 = slice_range.clone();
+
+        // Minimum number of values to amortize the overhead of spawning tasks.
+        // This value is arbitrarily chosen.
+        const VALUES_PER_THREAD: usize = 16_777_216;
+        let n_rows = row_group_data.row_group_metadata.num_rows();
+        let cols_per_task = 1 + VALUES_PER_THREAD / n_rows;
+
+        let decode_fut_iter = (0..self.projected_arrow_fields.len())
+            .step_by(cols_per_task)
+            .map(move |offset| {
+                let row_group_data = row_group_data_2.clone();
+                let slice_range = slice_range_2.clone();
+                let projected_arrow_fields = projected_arrow_fields.clone();
+
+                async move {
+                    (offset
+                        ..offset
+                            .saturating_add(cols_per_task)
+                            .min(projected_arrow_fields.len()))
+                        .map(|i| {
+                            let arrow_field = projected_arrow_fields[i].clone();
+
+                            let columns_to_deserialize = row_group_data
+                                .row_group_metadata
+                                .columns()
+                                .iter()
+                                .filter(|col_md| {
+                                    col_md.descriptor().path_in_schema[0] == arrow_field.name
+                                })
+                                .map(|col_md| {
+                                    let (offset, len) = col_md.byte_range();
+                                    let offset = offset as usize;
+                                    let len = len as usize;
+
+                                    (
+                                        col_md,
+                                        row_group_data.byte_source.get_range(offset..offset + len),
+                                    )
+                                })
+                                .collect::<Vec<_>>();
+
+                            assert!(
+                                slice_range.end <= row_group_data.row_group_metadata.num_rows()
+                            );
+
+                            let array = polars_io::prelude::_internal::to_deserializer(
+                                columns_to_deserialize,
+                                arrow_field.clone(),
+                                Some(polars_parquet::read::Filter::Range(slice_range.clone())),
+                            )?;
+
+                            let series = Series::try_from((&arrow_field, array))?;
+
+                            // TODO: Also load in the metadata.
+
+                            PolarsResult::Ok(series)
+                        })
+                        .collect::<PolarsResult<Vec<_>>>()
+                }
+            });
+
+        if decode_fut_iter.len() > 1 {
+            for handle in decode_fut_iter.map(|fut| {
+                async_executor::AbortOnDropHandle::new(async_executor::spawn(
+                    TaskPriority::Low,
+                    fut,
+                ))
+            }) {
+                out_columns.extend(handle.await?);
+            }
+        } else {
+            for fut in decode_fut_iter {
+                out_columns.extend(fut.await?);
+            }
+        }
+
+        let projection_height = if self.projected_arrow_fields.is_empty() {
+            slice_range.len()
+        } else {
+            debug_assert!(out_columns.len() > self.row_index.is_some() as usize);
+            out_columns.last().unwrap().len()
+        };
+
+        if let Some(RowIndex { name, offset }) = self.row_index.as_ref() {
+            let Some(offset) = (|| {
+                let offset = offset
+                    .checked_add((row_group_data.row_offset + slice_range.start) as IdxSize)?;
+                offset.checked_add(projection_height as IdxSize)?;
+
+                Some(offset)
+            })() else {
+                let msg = format!(
+                    "adding a row index column with offset {} overflows at {} rows",
+                    offset,
+                    row_group_data.row_offset + slice_range.end
+                );
+                polars_bail!(ComputeError: msg)
+            };
+
+            // The DataFrame can be empty at this point if no columns were projected from the file,
+            // so we create the row index column manually instead of using `df.with_row_index` to
+            // ensure it has the correct number of rows.
+            let mut ca = IdxCa::from_vec(
+                name,
+                (offset..offset + projection_height as IdxSize).collect(),
+            );
+            ca.set_sorted_flag(IsSorted::Ascending);
+
+            out_columns[0] = ca.into_series();
+        }
+
+        let shared_file_state = row_group_data
+            .shared_file_state
+            .get_or_init(|| async {
+                let path_index = row_group_data.path_index;
+
+                let hive_series = if let Some(hp) = self.hive_partitions.as_deref() {
+                    let mut v = hp[path_index].materialize_partition_columns();
+                    for s in v.iter_mut() {
+                        *s = s.new_from_index(0, row_group_data.file_max_row_group_height);
+                    }
+                    v
+                } else {
+                    vec![]
+                };
+
+                let file_path_series = self.include_file_paths.as_deref().map(|file_path_col| {
+                    StringChunked::full(
+                        file_path_col,
+                        self.paths[path_index].to_str().unwrap(),
+                        row_group_data.file_max_row_group_height,
+                    )
+                    .into_series()
+                });
+
+                SharedFileState {
+                    path_index,
+                    hive_series,
+                    file_path_series,
+                }
+            })
+            .await;
+
+        assert_eq!(shared_file_state.path_index, row_group_data.path_index);
+
+        for s in &shared_file_state.hive_series {
+            debug_assert!(s.len() >= projection_height);
+            out_columns.push(s.slice(0, projection_height));
+        }
+
+        if let Some(file_path_series) = &shared_file_state.file_path_series {
+            debug_assert!(file_path_series.len() >= projection_height);
+            out_columns.push(file_path_series.slice(0, projection_height));
+        }
+
+        let df = unsafe { DataFrame::new_no_checks(out_columns) };
+
+        // Re-calculate: A slice may have been applied.
+        let cols_per_task = 1 + VALUES_PER_THREAD / df.height();
+
+        let df = if let Some(predicate) = self.physical_predicate.as_deref() {
+            let mask = predicate.evaluate_io(&df)?;
+            let mask = mask.bool().unwrap();
+
+            if cols_per_task <= df.width() {
+                df._filter_seq(mask)?
+            } else {
+                let mask = mask.clone();
+                let cols = Arc::new(df.take_columns());
+                let mut out_cols = Vec::with_capacity(cols.len());
+
+                for handle in (0..cols.len())
+                    .step_by(cols_per_task)
+                    .map(move |offset| {
+                        let cols = cols.clone();
+                        let mask = mask.clone();
+                        async move {
+                            cols[offset..offset.saturating_add(cols_per_task).min(cols.len())]
+                                .iter()
+                                .map(|s| s.filter(&mask))
+                                .collect::<PolarsResult<Vec<_>>>()
+                        }
+                    })
+                    .map(|fut| {
+                        async_executor::AbortOnDropHandle::new(async_executor::spawn(
+                            TaskPriority::Low,
+                            fut,
+                        ))
+                    })
+                {
+                    out_cols.extend(handle.await?);
+                }
+
+                unsafe { DataFrame::new_no_checks(out_cols) }
+            }
+        } else {
+            df
+        };
+
+        assert_eq!(df.width(), out_width);
+
+        let n_morsels = if df.height() > 3 * self.ideal_morsel_size / 2 {
+            // num_rows > (1.5 * ideal_morsel_size)
+            (df.height() / self.ideal_morsel_size).max(2)
+        } else {
+            1
+        } as u64;
+
+        if n_morsels == 1 {
+            return Ok(vec![df]);
+        }
+
+        let rows_per_morsel = 1 + df.height() / n_morsels as usize;
+
+        let out = (0..i64::try_from(df.height()).unwrap())
+            .step_by(rows_per_morsel)
+            .map(|offset| df.slice(offset, rows_per_morsel))
+            .collect::<Vec<_>>();
+
+        Ok(out)
+    }
+}
+
+/// Read the metadata bytes of a parquet file, does not decode the bytes. If during metadata fetch
+/// the bytes of the entire file are loaded, it is returned in the second return value.
+async fn read_parquet_metadata_bytes(
+    byte_source: &DynByteSource,
+    verbose: bool,
+) -> PolarsResult<(MemSlice, Option<MemSlice>)> {
+    use polars_parquet::parquet::error::ParquetError;
+    use polars_parquet::parquet::PARQUET_MAGIC;
+
+    const FOOTER_HEADER_SIZE: usize = polars_parquet::parquet::FOOTER_SIZE as usize;
+
+    let file_size = byte_source.get_size().await?;
+
+    if file_size < FOOTER_HEADER_SIZE {
+        return Err(ParquetError::OutOfSpec(format!(
+            "file size ({}) is less than minimum size required to store parquet footer ({})",
+            file_size, FOOTER_HEADER_SIZE
+        ))
+        .into());
+    }
+
+    let estimated_metadata_size = if let DynByteSource::MemSlice(_) = byte_source {
+        // Mmapped or in-memory, reads are free.
+        file_size
+    } else {
+        (file_size / 2048).clamp(16_384, 131_072).min(file_size)
+    };
+
+    let bytes = byte_source
+        .get_range((file_size - estimated_metadata_size)..file_size)
+        .await?;
+
+    let footer_header_bytes = bytes.slice((bytes.len() - FOOTER_HEADER_SIZE)..bytes.len());
+
+    let (v, remaining) = footer_header_bytes.split_at(4);
+    let footer_size = i32::from_le_bytes(v.try_into().unwrap());
+
+    if remaining != PARQUET_MAGIC {
+        return Err(ParquetError::OutOfSpec(format!(
+            r#"expected parquet magic bytes "{}" in footer, got "{}" instead"#,
+            std::str::from_utf8(&PARQUET_MAGIC).unwrap(),
+            String::from_utf8_lossy(remaining)
+        ))
+        .into());
+    }
+
+    if footer_size < 0 {
+        return Err(ParquetError::OutOfSpec(format!(
+            "expected positive footer size, got {} instead",
+            footer_size
+        ))
+        .into());
+    }
+
+    let footer_size = footer_size as usize + FOOTER_HEADER_SIZE;
+
+    if file_size < footer_size {
+        return Err(ParquetError::OutOfSpec(format!(
+            "file size ({}) is less than the indicated footer size ({})",
+            file_size, footer_size
+        ))
+        .into());
+    }
+
+    if bytes.len() < footer_size {
+        debug_assert!(!matches!(byte_source, DynByteSource::MemSlice(_)));
+        if verbose {
+            eprintln!(
+                "[ParquetSource]: Extra {} bytes need to be fetched for metadata \
+            (initial estimate = {}, actual size = {})",
+                footer_size - estimated_metadata_size,
+                bytes.len(),
+                footer_size,
+            );
+        }
+
+        let mut out = Vec::with_capacity(footer_size);
+        let offset = file_size - footer_size;
+        let len = footer_size - bytes.len();
+        let delta_bytes = byte_source.get_range(offset..(offset + len)).await?;
+
+        debug_assert!(out.capacity() >= delta_bytes.len() + bytes.len());
+
+        out.extend_from_slice(&delta_bytes);
+        out.extend_from_slice(&bytes);
+
+        Ok((MemSlice::from_vec(out), None))
+    } else {
+        if verbose && !matches!(byte_source, DynByteSource::MemSlice(_)) {
+            eprintln!(
+                "[ParquetSource]: Fetched all bytes for metadata on first try \
+                (initial estimate = {}, actual size = {}, excess = {})",
+                bytes.len(),
+                footer_size,
+                estimated_metadata_size - footer_size,
+            );
+        }
+
+        let metadata_bytes = bytes.slice((bytes.len() - footer_size)..bytes.len());
+
+        if bytes.len() == file_size {
+            Ok((metadata_bytes, Some(bytes)))
+        } else {
+            debug_assert!(!matches!(byte_source, DynByteSource::MemSlice(_)));
+            let metadata_bytes = if bytes.len() - footer_size >= bytes.len() {
+                // Re-allocate to drop the excess bytes
+                MemSlice::from_vec(metadata_bytes.to_vec())
+            } else {
+                metadata_bytes
+            };
+
+            Ok((metadata_bytes, None))
+        }
+    }
+}
+
+fn get_row_group_byte_ranges(
+    row_group_metadata: &RowGroupMetaData,
+) -> impl ExactSizeIterator<Item = std::ops::Range<usize>> + '_ {
+    let row_group_columns = row_group_metadata.columns();
+
+    row_group_columns.iter().map(|rg_col_metadata| {
+        let (offset, len) = rg_col_metadata.byte_range();
+        (offset as usize)..(offset + len) as usize
+    })
+}
+
+/// TODO: This is quadratic - incorporate https://github.com/pola-rs/polars/pull/18327 that is
+/// merged.
+fn get_row_group_byte_ranges_for_projection<'a>(
+    row_group_metadata: &'a RowGroupMetaData,
+    columns: &'a [String],
+) -> impl Iterator<Item = std::ops::Range<usize>> + 'a {
+    let row_group_columns = row_group_metadata.columns();
+
+    row_group_columns.iter().filter_map(move |rg_col_metadata| {
+        for col_name in columns {
+            if &rg_col_metadata.descriptor().path_in_schema[0] == col_name {
+                let (offset, len) = rg_col_metadata.byte_range();
+                let range = (offset as usize)..((offset + len) as usize);
+                return Some(range);
+            }
+        }
+        None
+    })
+}
+
+/// Ensures that a parquet file has all the necessary columns for a projection with the correct
+/// dtype. There are no ordering requirements and extra columns are permitted.
+fn ensure_metadata_has_projected_fields(
+    projected_fields: &[polars_core::prelude::ArrowField],
+    metadata: &FileMetaData,
+) -> PolarsResult<()> {
+    let schema = polars_parquet::arrow::read::infer_schema(metadata)?;
+
+    // Note: We convert to Polars-native dtypes for timezone normalization.
+    let mut schema = schema
+        .fields
+        .into_iter()
+        .map(|x| {
+            let dtype = DataType::from_arrow(&x.data_type, true);
+            (x.name, dtype)
+        })
+        .collect::<PlHashMap<String, DataType>>();
+
+    for field in projected_fields {
+        let Some(dtype) = schema.remove(&field.name) else {
+            polars_bail!(SchemaMismatch: "did not find column: {}", field.name)
+        };
+
+        let expected_dtype = DataType::from_arrow(&field.data_type, true);
+
+        if dtype != expected_dtype {
+            polars_bail!(SchemaMismatch: "data type mismatch for column {}: found: {}, expected: {}",
+                &field.name, dtype, expected_dtype
+            )
+        }
+    }
+
+    Ok(())
+}
+
+fn get_memory_prefetch_func(verbose: bool) -> fn(&[u8]) -> () {
+    let memory_prefetch_func = match std::env::var("POLARS_MEMORY_PREFETCH").ok().as_deref() {
+        None => {
+            // Sequential advice was observed to provide speedups on Linux.
+            // ref https://github.com/pola-rs/polars/pull/18152#discussion_r1721701965
+            #[cfg(target_os = "linux")]
+            {
+                mem_prefetch_funcs::madvise_sequential
+            }
+            #[cfg(not(target_os = "linux"))]
+            {
+                mem_prefetch_funcs::no_prefetch
+            }
+        },
+        Some("no_prefetch") => mem_prefetch_funcs::no_prefetch,
+        Some("prefetch_l2") => mem_prefetch_funcs::prefetch_l2,
+        Some("madvise_sequential") => {
+            #[cfg(target_family = "unix")]
+            {
+                mem_prefetch_funcs::madvise_sequential
+            }
+            #[cfg(not(target_family = "unix"))]
+            {
+                panic!("POLARS_MEMORY_PREFETCH=madvise_sequential is not supported by this system");
+            }
+        },
+        Some("madvise_willneed") => {
+            #[cfg(target_family = "unix")]
+            {
+                mem_prefetch_funcs::madvise_willneed
+            }
+            #[cfg(not(target_family = "unix"))]
+            {
+                panic!("POLARS_MEMORY_PREFETCH=madvise_willneed is not supported by this system");
+            }
+        },
+        Some("madvise_populate_read") => {
+            #[cfg(target_os = "linux")]
+            {
+                mem_prefetch_funcs::madvise_populate_read
+            }
+            #[cfg(not(target_os = "linux"))]
+            {
+                panic!(
+                    "POLARS_MEMORY_PREFETCH=madvise_populate_read is not supported by this system"
+                );
+            }
+        },
+        Some(v) => panic!("invalid value for POLARS_MEMORY_PREFETCH: {}", v),
+    };
+
+    if verbose {
+        let func_name = match memory_prefetch_func as usize {
+            v if v == mem_prefetch_funcs::no_prefetch as usize => "no_prefetch",
+            v if v == mem_prefetch_funcs::prefetch_l2 as usize => "prefetch_l2",
+            v if v == mem_prefetch_funcs::madvise_sequential as usize => "madvise_sequential",
+            v if v == mem_prefetch_funcs::madvise_willneed as usize => "madvise_willneed",
+            v if v == mem_prefetch_funcs::madvise_populate_read as usize => "madvise_populate_read",
+            _ => unreachable!(),
+        };
+
+        eprintln!("[ParquetSource] Memory prefetch function: {}", func_name);
+    }
+
+    memory_prefetch_func
+}
+
+mod mem_prefetch_funcs {
+    pub use polars_utils::mem::{
+        madvise_populate_read, madvise_sequential, madvise_willneed, prefetch_l2,
+    };
+
+    pub fn no_prefetch(_: &[u8]) {}
+}
diff --git a/crates/polars-stream/src/nodes/reduce.rs b/crates/polars-stream/src/nodes/reduce.rs
index 4dc4d859ba62..3b6c7b2bea62 100644
--- a/crates/polars-stream/src/nodes/reduce.rs
+++ b/crates/polars-stream/src/nodes/reduce.rs
@@ -2,6 +2,7 @@ use std::sync::Arc;
 
 use polars_core::schema::Schema;
 use polars_expr::reduce::Reduction;
+use polars_utils::itertools::Itertools;
 
 use super::compute_node_prelude::*;
 use crate::expression::StreamExpr;
@@ -97,7 +98,7 @@ impl ComputeNode for ReduceNode {
         "reduce"
     }
 
-    fn update_state(&mut self, recv: &mut [PortState], send: &mut [PortState]) {
+    fn update_state(&mut self, recv: &mut [PortState], send: &mut [PortState]) -> PolarsResult<()> {
         assert!(recv.len() == 1 && send.len() == 1);
 
         // State transitions.
@@ -108,7 +109,6 @@ impl ComputeNode for ReduceNode {
             },
             // Input is done, transition to being a source.
             ReduceState::Sink { reductions, .. } if matches!(recv[0], PortState::Done) => {
-                // TODO! make `update_state` fallible.
                 let columns = reductions
                     .iter_mut()
                     .zip(self.output_schema.iter_fields())
@@ -117,9 +117,8 @@ impl ComputeNode for ReduceNode {
                             scalar.into_series(&field.name).cast(&field.dtype).unwrap()
                         })
                     })
-                    .collect::<PolarsResult<Vec<_>>>()
-                    .unwrap();
-                let out = unsafe { DataFrame::new_no_checks(columns) };
+                    .try_collect_vec()?;
+                let out = DataFrame::new(columns).unwrap();
 
                 self.state = ReduceState::Source(Some(out));
             },
@@ -146,6 +145,7 @@ impl ComputeNode for ReduceNode {
                 send[0] = PortState::Done;
             },
         }
+        Ok(())
     }
 
     fn spawn<'env, 's>(
diff --git a/crates/polars-stream/src/nodes/select.rs b/crates/polars-stream/src/nodes/select.rs
index 568351ee4f47..688580e10319 100644
--- a/crates/polars-stream/src/nodes/select.rs
+++ b/crates/polars-stream/src/nodes/select.rs
@@ -26,9 +26,10 @@ impl ComputeNode for SelectNode {
         "select"
     }
 
-    fn update_state(&mut self, recv: &mut [PortState], send: &mut [PortState]) {
+    fn update_state(&mut self, recv: &mut [PortState], send: &mut [PortState]) -> PolarsResult<()> {
         assert!(recv.len() == 1 && send.len() == 1);
         recv.swap_with_slice(send);
+        Ok(())
     }
 
     fn spawn<'env, 's>(
@@ -59,20 +60,7 @@ impl ComputeNode for SelectNode {
                         out._add_columns(selected, &slf.schema)?;
                         out
                     } else {
-                        // Broadcast scalars.
-                        let max_non_unit_length = selected
-                            .iter()
-                            .map(|s| s.len())
-                            .filter(|l| *l != 1)
-                            .max()
-                            .unwrap_or(1);
-                        for s in &mut selected {
-                            if s.len() != max_non_unit_length {
-                                assert!(s.len() == 1, "got series of incompatible lengths");
-                                *s = s.new_from_index(0, max_non_unit_length);
-                            }
-                        }
-                        unsafe { DataFrame::new_no_checks(selected) }
+                        DataFrame::new_with_broadcast(selected)?
                     };
 
                     let mut morsel = Morsel::new(ret, seq, source_token);
diff --git a/crates/polars-stream/src/nodes/simple_projection.rs b/crates/polars-stream/src/nodes/simple_projection.rs
index 1a643b642e73..d4e82dde8ad8 100644
--- a/crates/polars-stream/src/nodes/simple_projection.rs
+++ b/crates/polars-stream/src/nodes/simple_projection.rs
@@ -23,9 +23,10 @@ impl ComputeNode for SimpleProjectionNode {
         "simple_projection"
     }
 
-    fn update_state(&mut self, recv: &mut [PortState], send: &mut [PortState]) {
+    fn update_state(&mut self, recv: &mut [PortState], send: &mut [PortState]) -> PolarsResult<()> {
         assert!(recv.len() == 1 && send.len() == 1);
         recv.swap_with_slice(send);
+        Ok(())
     }
 
     fn spawn<'env, 's>(
diff --git a/crates/polars-stream/src/nodes/streaming_slice.rs b/crates/polars-stream/src/nodes/streaming_slice.rs
index b46693bac808..950b39331588 100644
--- a/crates/polars-stream/src/nodes/streaming_slice.rs
+++ b/crates/polars-stream/src/nodes/streaming_slice.rs
@@ -30,13 +30,14 @@ impl ComputeNode for StreamingSliceNode {
         self.num_pipelines = num_pipelines;
     }
 
-    fn update_state(&mut self, recv: &mut [PortState], send: &mut [PortState]) {
+    fn update_state(&mut self, recv: &mut [PortState], send: &mut [PortState]) -> PolarsResult<()> {
         if self.stream_offset >= self.start_offset + self.length || self.length == 0 {
             recv[0] = PortState::Done;
             send[0] = PortState::Done;
         } else {
             recv.swap_with_slice(send);
         }
+        Ok(())
     }
 
     fn spawn<'env, 's>(
diff --git a/crates/polars-stream/src/nodes/zip.rs b/crates/polars-stream/src/nodes/zip.rs
index b5b860880a1b..ff1e336a178f 100644
--- a/crates/polars-stream/src/nodes/zip.rs
+++ b/crates/polars-stream/src/nodes/zip.rs
@@ -4,6 +4,7 @@ use std::sync::Arc;
 use polars_core::functions::concat_df_horizontal;
 use polars_core::schema::Schema;
 use polars_core::series::Series;
+use polars_error::polars_ensure;
 
 use super::compute_node_prelude::*;
 use crate::morsel::SourceToken;
@@ -138,7 +139,7 @@ impl ComputeNode for ZipNode {
         "zip"
     }
 
-    fn update_state(&mut self, recv: &mut [PortState], send: &mut [PortState]) {
+    fn update_state(&mut self, recv: &mut [PortState], send: &mut [PortState]) -> PolarsResult<()> {
         assert!(send.len() == 1);
         assert!(recv.len() == self.input_heads.len());
 
@@ -167,9 +168,9 @@ impl ComputeNode for ZipNode {
         }
 
         if !self.null_extend {
-            assert!(
+            polars_ensure!(
                 !(at_least_one_non_broadcast_done && at_least_one_non_broadcast_nonempty),
-                "zip received non-equal length inputs"
+                ShapeMismatch: "zip node received non-equal length inputs"
             );
         }
 
@@ -196,6 +197,7 @@ impl ComputeNode for ZipNode {
         for r in recv {
             *r = new_recv_state;
         }
+        Ok(())
     }
 
     fn spawn<'env, 's>(
diff --git a/crates/polars-stream/src/physical_plan/fmt.rs b/crates/polars-stream/src/physical_plan/fmt.rs
new file mode 100644
index 000000000000..20aa1cf1486f
--- /dev/null
+++ b/crates/polars-stream/src/physical_plan/fmt.rs
@@ -0,0 +1,199 @@
+use std::fmt::Write;
+
+use polars_plan::plans::expr_ir::ExprIR;
+use polars_plan::plans::{AExpr, EscapeLabel, FileScan, PathsDisplay};
+use polars_utils::arena::Arena;
+use polars_utils::itertools::Itertools;
+use slotmap::{Key, SecondaryMap, SlotMap};
+
+use super::{PhysNode, PhysNodeKey, PhysNodeKind};
+
+fn escape_graphviz(s: &str) -> String {
+    s.replace('\\', "\\\\")
+        .replace('\n', "\\n")
+        .replace('"', "\\\"")
+}
+
+fn fmt_exprs(exprs: &[ExprIR], expr_arena: &Arena<AExpr>) -> String {
+    exprs
+        .iter()
+        .map(|e| escape_graphviz(&e.display(expr_arena).to_string()))
+        .collect_vec()
+        .join("\\n")
+}
+
+#[recursive::recursive]
+fn visualize_plan_rec(
+    node_key: PhysNodeKey,
+    phys_sm: &SlotMap<PhysNodeKey, PhysNode>,
+    expr_arena: &Arena<AExpr>,
+    visited: &mut SecondaryMap<PhysNodeKey, ()>,
+    out: &mut Vec<String>,
+) {
+    if visited.contains_key(node_key) {
+        return;
+    }
+    visited.insert(node_key, ());
+
+    use std::slice::from_ref;
+    let (label, inputs) = match &phys_sm[node_key].kind {
+        PhysNodeKind::InMemorySource { df } => (
+            format!(
+                "in-memory-source\\ncols: {}",
+                df.get_column_names().join(", ")
+            ),
+            &[][..],
+        ),
+        PhysNodeKind::Select {
+            input,
+            selectors,
+            extend_original,
+        } => {
+            let label = if *extend_original {
+                "with-columns"
+            } else {
+                "select"
+            };
+            (
+                format!("{label}\\n{}", fmt_exprs(selectors, expr_arena)),
+                from_ref(input),
+            )
+        },
+        PhysNodeKind::Reduce { input, exprs } => (
+            format!("reduce\\n{}", fmt_exprs(exprs, expr_arena)),
+            from_ref(input),
+        ),
+        PhysNodeKind::StreamingSlice {
+            input,
+            offset,
+            length,
+        } => (
+            format!("slice\\noffset: {offset}, length: {length}"),
+            from_ref(input),
+        ),
+        PhysNodeKind::Filter { input, predicate } => (
+            format!("filter\\n{}", fmt_exprs(from_ref(predicate), expr_arena)),
+            from_ref(input),
+        ),
+        PhysNodeKind::SimpleProjection { input, columns } => (
+            format!("select\\ncols: {}", columns.join(", ")),
+            from_ref(input),
+        ),
+        PhysNodeKind::InMemorySink { input } => ("in-memory-sink".to_string(), from_ref(input)),
+        PhysNodeKind::InMemoryMap { input, map: _ } => {
+            ("in-memory-map".to_string(), from_ref(input))
+        },
+        PhysNodeKind::Map { input, map: _ } => ("map".to_string(), from_ref(input)),
+        PhysNodeKind::Sort {
+            input,
+            by_column,
+            slice: _,
+            sort_options: _,
+        } => (
+            format!("sort\\n{}", fmt_exprs(by_column, expr_arena)),
+            from_ref(input),
+        ),
+        PhysNodeKind::OrderedUnion { inputs } => ("ordered-union".to_string(), inputs.as_slice()),
+        PhysNodeKind::Zip {
+            inputs,
+            null_extend,
+        } => {
+            let label = if *null_extend {
+                "zip-null-extend"
+            } else {
+                "zip"
+            };
+            (label.to_string(), inputs.as_slice())
+        },
+        PhysNodeKind::Multiplexer { input } => ("multiplexer".to_string(), from_ref(input)),
+        PhysNodeKind::FileScan {
+            paths,
+            file_info,
+            hive_parts,
+            output_schema: _,
+            scan_type,
+            predicate,
+            file_options,
+        } => {
+            let name = match scan_type {
+                FileScan::Parquet { .. } => "parquet-source",
+                FileScan::Csv { .. } => "csv-source",
+                FileScan::Ipc { .. } => "ipc-source",
+                FileScan::NDJson { .. } => "ndjson-source",
+                FileScan::Anonymous { .. } => "anonymous-source",
+            };
+
+            let mut out = name.to_string();
+            let mut f = EscapeLabel(&mut out);
+
+            {
+                let paths_display = PathsDisplay(paths.as_ref());
+
+                write!(f, "\npaths: {}", paths_display).unwrap();
+            }
+
+            {
+                let total_columns =
+                    file_info.schema.len() - usize::from(file_options.row_index.is_some());
+                let n_columns = file_options
+                    .with_columns
+                    .as_ref()
+                    .map(|columns| columns.len());
+
+                if let Some(n) = n_columns {
+                    write!(f, "\nprojection: {}/{total_columns}", n).unwrap();
+                } else {
+                    write!(f, "\nprojection: */{total_columns}").unwrap();
+                }
+            }
+
+            if let Some(polars_io::RowIndex { name, offset }) = &file_options.row_index {
+                write!(f, r#"\nrow index: name: "{}", offset: {}"#, name, offset).unwrap();
+            }
+
+            if let Some((offset, len)) = file_options.slice {
+                write!(f, "\nslice: offset: {}, len: {}", offset, len).unwrap();
+            }
+
+            if let Some(predicate) = predicate.as_ref() {
+                write!(f, "\nfilter: {}", predicate.display(expr_arena)).unwrap();
+            }
+
+            if let Some(v) = hive_parts
+                .as_deref()
+                .map(|x| x[0].get_statistics().column_stats().len())
+            {
+                write!(f, "\nhive: {} columns", v).unwrap();
+            }
+
+            (out, &[][..])
+        },
+    };
+
+    out.push(format!(
+        "{} [label=\"{}\"];",
+        node_key.data().as_ffi(),
+        label
+    ));
+    for input in inputs {
+        visualize_plan_rec(*input, phys_sm, expr_arena, visited, out);
+        out.push(format!(
+            "{} -> {};",
+            input.data().as_ffi(),
+            node_key.data().as_ffi()
+        ));
+    }
+}
+
+pub fn visualize_plan(
+    root: PhysNodeKey,
+    phys_sm: &SlotMap<PhysNodeKey, PhysNode>,
+    expr_arena: &Arena<AExpr>,
+) -> String {
+    let mut visited: SecondaryMap<PhysNodeKey, ()> = SecondaryMap::new();
+    let mut out = Vec::with_capacity(phys_sm.len() + 2);
+    out.push("digraph polars {\nrankdir=\"BT\"".to_string());
+    visualize_plan_rec(root, phys_sm, expr_arena, &mut visited, &mut out);
+    out.push("}".to_string());
+    out.join("\n")
+}
diff --git a/crates/polars-stream/src/physical_plan/lower_expr.rs b/crates/polars-stream/src/physical_plan/lower_expr.rs
new file mode 100644
index 000000000000..13a1a309e50b
--- /dev/null
+++ b/crates/polars-stream/src/physical_plan/lower_expr.rs
@@ -0,0 +1,751 @@
+use std::sync::atomic::{AtomicU64, Ordering};
+use std::sync::Arc;
+
+use polars_core::frame::DataFrame;
+use polars_core::prelude::{Field, InitHashMaps, PlHashMap, PlHashSet};
+use polars_core::schema::Schema;
+use polars_error::PolarsResult;
+use polars_expr::planner::get_expr_depth_limit;
+use polars_expr::state::ExecutionState;
+use polars_expr::{create_physical_expr, ExpressionConversionState};
+use polars_plan::plans::expr_ir::{ExprIR, OutputName};
+use polars_plan::plans::{AExpr, LiteralValue};
+use polars_plan::prelude::*;
+use polars_utils::arena::{Arena, Node};
+use polars_utils::itertools::Itertools;
+use slotmap::SlotMap;
+
+use super::{PhysNode, PhysNodeKey, PhysNodeKind};
+
+type IRNodeKey = Node;
+
+fn unique_column_name() -> ColumnName {
+    static COUNTER: AtomicU64 = AtomicU64::new(0);
+    let idx = COUNTER.fetch_add(1, Ordering::Relaxed);
+    format!("__POLARS_STMP_{idx}").into()
+}
+
+struct LowerExprContext<'a> {
+    expr_arena: &'a mut Arena<AExpr>,
+    phys_sm: &'a mut SlotMap<PhysNodeKey, PhysNode>,
+    is_elementwise_cache: PlHashMap<Node, bool>,
+    is_input_independent_cache: PlHashMap<Node, bool>,
+}
+
+#[recursive::recursive]
+fn is_elementwise_rec(
+    expr_key: IRNodeKey,
+    arena: &Arena<AExpr>,
+    cache: &mut PlHashMap<IRNodeKey, bool>,
+) -> bool {
+    if let Some(ret) = cache.get(&expr_key) {
+        return *ret;
+    }
+
+    let ret = match arena.get(expr_key) {
+        AExpr::Explode(_) => false,
+        AExpr::Alias(inner, _) => is_elementwise_rec(*inner, arena, cache),
+        AExpr::Column(_) => true,
+        AExpr::Literal(lit) => !matches!(lit, LiteralValue::Series(_) | LiteralValue::Range { .. }),
+        AExpr::BinaryExpr { left, op: _, right } => {
+            is_elementwise_rec(*left, arena, cache) && is_elementwise_rec(*right, arena, cache)
+        },
+        AExpr::Cast {
+            expr,
+            data_type: _,
+            options: _,
+        } => is_elementwise_rec(*expr, arena, cache),
+        AExpr::Sort { .. } | AExpr::SortBy { .. } | AExpr::Gather { .. } => false,
+        AExpr::Filter { .. } => false,
+        AExpr::Agg(_) => false,
+        AExpr::Ternary {
+            predicate,
+            truthy,
+            falsy,
+        } => {
+            is_elementwise_rec(*predicate, arena, cache)
+                && is_elementwise_rec(*truthy, arena, cache)
+                && is_elementwise_rec(*falsy, arena, cache)
+        },
+        AExpr::AnonymousFunction {
+            input: _,
+            function: _,
+            output_type: _,
+            options,
+        } => options.is_elementwise(),
+        AExpr::Function {
+            input,
+            function,
+            options,
+        } => match function {
+            FunctionExpr::AsStruct => input
+                .iter()
+                .all(|expr| is_elementwise_rec(expr.node(), arena, cache)),
+            _ => options.is_elementwise(),
+        },
+
+        AExpr::Window { .. } => false,
+        AExpr::Slice { .. } => false,
+        AExpr::Len => false,
+    };
+
+    cache.insert(expr_key, ret);
+    ret
+}
+
+fn is_elementwise(expr_key: IRNodeKey, ctx: &mut LowerExprContext) -> bool {
+    is_elementwise_rec(expr_key, ctx.expr_arena, &mut ctx.is_elementwise_cache)
+}
+
+#[recursive::recursive]
+fn is_input_independent_rec(
+    expr_key: IRNodeKey,
+    arena: &Arena<AExpr>,
+    cache: &mut PlHashMap<IRNodeKey, bool>,
+) -> bool {
+    if let Some(ret) = cache.get(&expr_key) {
+        return *ret;
+    }
+
+    let ret = match arena.get(expr_key) {
+        AExpr::Explode(inner)
+        | AExpr::Alias(inner, _)
+        | AExpr::Cast {
+            expr: inner,
+            data_type: _,
+            options: _,
+        }
+        | AExpr::Sort {
+            expr: inner,
+            options: _,
+        } => is_input_independent_rec(*inner, arena, cache),
+        AExpr::Column(_) => false,
+        AExpr::Literal(_) => true,
+        AExpr::BinaryExpr { left, op: _, right } => {
+            is_input_independent_rec(*left, arena, cache)
+                && is_input_independent_rec(*right, arena, cache)
+        },
+        AExpr::Gather {
+            expr,
+            idx,
+            returns_scalar: _,
+        } => {
+            is_input_independent_rec(*expr, arena, cache)
+                && is_input_independent_rec(*idx, arena, cache)
+        },
+        AExpr::SortBy {
+            expr,
+            by,
+            sort_options: _,
+        } => {
+            is_input_independent_rec(*expr, arena, cache)
+                && by
+                    .iter()
+                    .all(|expr| is_input_independent_rec(*expr, arena, cache))
+        },
+        AExpr::Filter { input, by } => {
+            is_input_independent_rec(*input, arena, cache)
+                && is_input_independent_rec(*by, arena, cache)
+        },
+        AExpr::Agg(agg_expr) => match agg_expr.get_input() {
+            polars_plan::plans::NodeInputs::Leaf => true,
+            polars_plan::plans::NodeInputs::Single(expr) => {
+                is_input_independent_rec(expr, arena, cache)
+            },
+            polars_plan::plans::NodeInputs::Many(exprs) => exprs
+                .iter()
+                .all(|expr| is_input_independent_rec(*expr, arena, cache)),
+        },
+        AExpr::Ternary {
+            predicate,
+            truthy,
+            falsy,
+        } => {
+            is_input_independent_rec(*predicate, arena, cache)
+                && is_input_independent_rec(*truthy, arena, cache)
+                && is_input_independent_rec(*falsy, arena, cache)
+        },
+        AExpr::AnonymousFunction {
+            input,
+            function: _,
+            output_type: _,
+            options: _,
+        }
+        | AExpr::Function {
+            input,
+            function: _,
+            options: _,
+        } => input
+            .iter()
+            .all(|expr| is_input_independent_rec(expr.node(), arena, cache)),
+        AExpr::Window {
+            function,
+            partition_by,
+            order_by,
+            options: _,
+        } => {
+            is_input_independent_rec(*function, arena, cache)
+                && partition_by
+                    .iter()
+                    .all(|expr| is_input_independent_rec(*expr, arena, cache))
+                && order_by
+                    .iter()
+                    .all(|(expr, _options)| is_input_independent_rec(*expr, arena, cache))
+        },
+        AExpr::Slice {
+            input,
+            offset,
+            length,
+        } => {
+            is_input_independent_rec(*input, arena, cache)
+                && is_input_independent_rec(*offset, arena, cache)
+                && is_input_independent_rec(*length, arena, cache)
+        },
+        AExpr::Len => false,
+    };
+
+    cache.insert(expr_key, ret);
+    ret
+}
+
+fn is_input_independent(expr_key: IRNodeKey, ctx: &mut LowerExprContext) -> bool {
+    is_input_independent_rec(
+        expr_key,
+        ctx.expr_arena,
+        &mut ctx.is_input_independent_cache,
+    )
+}
+
+fn build_input_independent_node_with_ctx(
+    exprs: &[ExprIR],
+    ctx: &mut LowerExprContext,
+) -> PolarsResult<PhysNodeKey> {
+    let expr_depth_limit = get_expr_depth_limit()?;
+    let mut state = ExpressionConversionState::new(false, expr_depth_limit);
+    let empty = DataFrame::empty();
+    let execution_state = ExecutionState::new();
+    let columns = exprs
+        .iter()
+        .map(|expr| {
+            let phys_expr =
+                create_physical_expr(expr, Context::Default, ctx.expr_arena, None, &mut state)?;
+
+            phys_expr.evaluate(&empty, &execution_state)
+        })
+        .try_collect_vec()?;
+
+    let df = Arc::new(DataFrame::new_with_broadcast(columns)?);
+    Ok(ctx.phys_sm.insert(PhysNode::new(
+        Arc::new(df.schema()),
+        PhysNodeKind::InMemorySource { df },
+    )))
+}
+
+fn simplify_input_nodes(
+    orig_input: PhysNodeKey,
+    mut input_nodes: PlHashSet<PhysNodeKey>,
+    ctx: &mut LowerExprContext,
+) -> PolarsResult<PlHashSet<PhysNodeKey>> {
+    // Flatten nested zips (ensures the original input columns only occur once).
+    if input_nodes.len() > 1 {
+        let mut flattened_input_nodes = PlHashSet::with_capacity(input_nodes.len());
+        for input_node in input_nodes {
+            if let PhysNodeKind::Zip {
+                inputs,
+                null_extend: false,
+            } = &ctx.phys_sm[input_node].kind
+            {
+                flattened_input_nodes.extend(inputs);
+                ctx.phys_sm.remove(input_node);
+            } else {
+                flattened_input_nodes.insert(input_node);
+            }
+        }
+        input_nodes = flattened_input_nodes;
+    }
+
+    // Merge reduce nodes that directly operate on the original input.
+    let mut combined_exprs = vec![];
+    input_nodes = input_nodes
+        .into_iter()
+        .filter(|input_node| {
+            if let PhysNodeKind::Reduce {
+                input: inner,
+                exprs,
+            } = &ctx.phys_sm[*input_node].kind
+            {
+                if *inner == orig_input {
+                    combined_exprs.extend(exprs.iter().cloned());
+                    ctx.phys_sm.remove(*input_node);
+                    return false;
+                }
+            }
+            true
+        })
+        .collect();
+    if !combined_exprs.is_empty() {
+        let output_schema = schema_for_select(orig_input, &combined_exprs, ctx)?;
+        let kind = PhysNodeKind::Reduce {
+            input: orig_input,
+            exprs: combined_exprs,
+        };
+        let reduce_node_key = ctx.phys_sm.insert(PhysNode::new(output_schema, kind));
+        input_nodes.insert(reduce_node_key);
+    }
+
+    Ok(input_nodes)
+}
+
+fn build_fallback_node_with_ctx(
+    input: PhysNodeKey,
+    exprs: &[ExprIR],
+    ctx: &mut LowerExprContext,
+) -> PolarsResult<PhysNodeKey> {
+    // Pre-select only the columns that are needed for this fallback expression.
+    let input_schema = &ctx.phys_sm[input].output_schema;
+    let select_names: PlHashSet<_> = exprs
+        .iter()
+        .flat_map(|expr| polars_plan::utils::aexpr_to_leaf_names_iter(expr.node(), ctx.expr_arena))
+        .collect();
+    let input_node = if input_schema
+        .iter_names()
+        .any(|name| !select_names.contains(name.as_str()))
+    {
+        let select_exprs = select_names
+            .into_iter()
+            .map(|name| {
+                ExprIR::new(
+                    ctx.expr_arena.add(AExpr::Column(name.clone())),
+                    OutputName::ColumnLhs(name),
+                )
+            })
+            .collect_vec();
+        build_select_node_with_ctx(input, &select_exprs, ctx)?
+    } else {
+        input
+    };
+
+    let output_schema = schema_for_select(input_node, exprs, ctx)?;
+    let expr_depth_limit = get_expr_depth_limit()?;
+    let mut conv_state = ExpressionConversionState::new(false, expr_depth_limit);
+    let phys_exprs = exprs
+        .iter()
+        .map(|expr| {
+            create_physical_expr(
+                expr,
+                Context::Default,
+                ctx.expr_arena,
+                None,
+                &mut conv_state,
+            )
+        })
+        .try_collect_vec()?;
+    let map = move |df| {
+        let exec_state = ExecutionState::new();
+        let columns = phys_exprs
+            .iter()
+            .map(|phys_expr| phys_expr.evaluate(&df, &exec_state))
+            .try_collect()?;
+        DataFrame::new_with_broadcast(columns)
+    };
+    let kind = PhysNodeKind::InMemoryMap {
+        input: input_node,
+        map: Arc::new(map),
+    };
+    Ok(ctx.phys_sm.insert(PhysNode::new(output_schema, kind)))
+}
+
+// In the recursive lowering we don't bother with named expressions at all, so
+// we work directly with Nodes.
+#[recursive::recursive]
+fn lower_exprs_with_ctx(
+    input: PhysNodeKey,
+    exprs: &[Node],
+    ctx: &mut LowerExprContext,
+) -> PolarsResult<(PhysNodeKey, Vec<Node>)> {
+    // We have to catch this case separately, in case all the input independent expressions are elementwise.
+    // TODO: we shouldn't always do this when recursing, e.g. pl.col.a.sum() + 1 will still hit this in the recursion.
+    if exprs.iter().all(|e| is_input_independent(*e, ctx)) {
+        let expr_irs = exprs
+            .iter()
+            .map(|e| ExprIR::new(*e, OutputName::Alias(unique_column_name())))
+            .collect_vec();
+        let node = build_input_independent_node_with_ctx(&expr_irs, ctx)?;
+        let out_exprs = expr_irs
+            .iter()
+            .map(|e| ctx.expr_arena.add(AExpr::Column(e.output_name().into())))
+            .collect();
+        return Ok((node, out_exprs));
+    }
+
+    // Fallback expressions that can directly be applied to the original input.
+    let mut fallback_subset = Vec::new();
+
+    // Nodes containing the columns used for executing transformed expressions.
+    let mut input_nodes = PlHashSet::new();
+
+    // The final transformed expressions that will be selected from the zipped
+    // together transformed nodes.
+    let mut transformed_exprs = Vec::with_capacity(exprs.len());
+
+    for expr in exprs.iter().copied() {
+        if is_elementwise(expr, ctx) {
+            if !is_input_independent(expr, ctx) {
+                input_nodes.insert(input);
+            }
+            transformed_exprs.push(expr);
+            continue;
+        }
+
+        match ctx.expr_arena.get(expr).clone() {
+            AExpr::Explode(inner) => {
+                // While explode is streamable, it is not elementwise, so we
+                // have to transform it to a select node.
+                let (trans_input, trans_exprs) = lower_exprs_with_ctx(input, &[inner], ctx)?;
+                let exploded_name = unique_column_name();
+                let trans_inner = ctx.expr_arena.add(AExpr::Explode(trans_exprs[0]));
+                let explode_expr = ExprIR::new(trans_inner, OutputName::Alias(exploded_name.clone()));
+                let output_schema = schema_for_select(trans_input, &[explode_expr.clone()], ctx)?;
+                let node_kind = PhysNodeKind::Select {
+                    input: trans_input,
+                    selectors: vec![explode_expr.clone()],
+                    extend_original: false,
+                };
+                let node_key = ctx.phys_sm.insert(PhysNode::new(output_schema, node_kind));
+                input_nodes.insert(node_key);
+                transformed_exprs.push(ctx.expr_arena.add(AExpr::Column(exploded_name)));
+            },
+            AExpr::Alias(_, _) => unreachable!("alias found in physical plan"),
+            AExpr::Column(_) => unreachable!("column should always be streamable"),
+            AExpr::Literal(_) => {
+                let out_name = unique_column_name();
+                let inner_expr = ExprIR::new(expr, OutputName::Alias(out_name.clone()));
+                input_nodes.insert(build_input_independent_node_with_ctx(&[inner_expr], ctx)?);
+                transformed_exprs.push(ctx.expr_arena.add(AExpr::Column(out_name)));
+            },
+            AExpr::BinaryExpr { left, op, right } => {
+                let (trans_input, trans_exprs) = lower_exprs_with_ctx(input, &[left, right], ctx)?;
+                let bin_expr = AExpr::BinaryExpr {
+                    left: trans_exprs[0],
+                    op,
+                    right: trans_exprs[1],
+                };
+                input_nodes.insert(trans_input);
+                transformed_exprs.push(ctx.expr_arena.add(bin_expr));
+            },
+            AExpr::Ternary {
+                predicate,
+                truthy,
+                falsy,
+            } => {
+                let (trans_input, trans_exprs) =
+                    lower_exprs_with_ctx(input, &[predicate, truthy, falsy], ctx)?;
+                let tern_expr = AExpr::Ternary {
+                    predicate: trans_exprs[0],
+                    truthy: trans_exprs[1],
+                    falsy: trans_exprs[2],
+                };
+                input_nodes.insert(trans_input);
+                transformed_exprs.push(ctx.expr_arena.add(tern_expr));
+            },
+            AExpr::Cast {
+                expr: inner,
+                data_type,
+                options,
+            } => {
+                let (trans_input, trans_exprs) = lower_exprs_with_ctx(input, &[inner], ctx)?;
+                input_nodes.insert(trans_input);
+                transformed_exprs.push(ctx.expr_arena.add(AExpr::Cast {
+                    expr: trans_exprs[0],
+                    data_type,
+                    options,
+                }));
+            },
+            AExpr::Sort {
+                expr: inner,
+                options,
+            } => {
+                // As we'll refer to the sorted column twice, ensure the inner
+                // expr is available as a column by selecting first.
+                let sorted_name = unique_column_name();
+                let inner_expr_ir = ExprIR::new(inner, OutputName::Alias(sorted_name.clone()));
+                let select_node = build_select_node_with_ctx(input, &[inner_expr_ir.clone()], ctx)?;
+                let col_expr = ctx.expr_arena.add(AExpr::Column(sorted_name.clone()));
+                let kind = PhysNodeKind::Sort {
+                    input: select_node,
+                    by_column: vec![ExprIR::new(col_expr, OutputName::Alias(sorted_name))],
+                    slice: None,
+                    sort_options: (&options).into(),
+                };
+                let output_schema = ctx.phys_sm[select_node].output_schema.clone();
+                let node_key = ctx.phys_sm.insert(PhysNode::new(output_schema, kind));
+                input_nodes.insert(node_key);
+                transformed_exprs.push(col_expr);
+            },
+            AExpr::SortBy {
+                expr: inner,
+                by,
+                sort_options,
+            } => {
+                // Select our inputs (if we don't do this we'll waste time sorting irrelevant columns).
+                let sorted_name = unique_column_name();
+                let by_names = by.iter().map(|_| unique_column_name()).collect_vec();
+                let all_inner_expr_irs = [(&sorted_name, inner)]
+                    .into_iter()
+                    .chain(by_names.iter().zip(by.iter().copied()))
+                    .map(|(name, inner)| ExprIR::new(inner, OutputName::Alias(name.clone())))
+                    .collect_vec();
+                let select_node = build_select_node_with_ctx(input, &all_inner_expr_irs, ctx)?;
+
+                // Sort the inputs.
+                let kind = PhysNodeKind::Sort {
+                    input: select_node,
+                    by_column: by_names
+                        .into_iter()
+                        .map(|name| {
+                            ExprIR::new(
+                                ctx.expr_arena.add(AExpr::Column(name.clone())),
+                                OutputName::Alias(name),
+                            )
+                        })
+                        .collect(),
+                    slice: None,
+                    sort_options,
+                };
+                let output_schema = ctx.phys_sm[select_node].output_schema.clone();
+                let sort_node_key = ctx.phys_sm.insert(PhysNode::new(output_schema, kind));
+
+                // Drop the by columns.
+                let sorted_col_expr = ctx.expr_arena.add(AExpr::Column(sorted_name.clone()));
+                let sorted_col_ir =
+                    ExprIR::new(sorted_col_expr, OutputName::Alias(sorted_name.clone()));
+                let post_sort_select_node =
+                    build_select_node_with_ctx(sort_node_key, &[sorted_col_ir], ctx)?;
+                input_nodes.insert(post_sort_select_node);
+                transformed_exprs.push(sorted_col_expr);
+            },
+            AExpr::Gather { .. } => todo!(),
+            AExpr::Filter { input: inner, by } => {
+                // Select our inputs (if we don't do this we'll waste time filtering irrelevant columns).
+                let out_name = unique_column_name();
+                let by_name = unique_column_name();
+                let inner_expr_ir = ExprIR::new(inner, OutputName::Alias(out_name.clone()));
+                let by_expr_ir = ExprIR::new(by, OutputName::Alias(by_name.clone()));
+                let select_node =
+                    build_select_node_with_ctx(input, &[inner_expr_ir, by_expr_ir], ctx)?;
+
+                // Add a filter node.
+                let predicate = ExprIR::new(
+                    ctx.expr_arena.add(AExpr::Column(by_name.clone())),
+                    OutputName::Alias(by_name),
+                );
+                let kind = PhysNodeKind::Filter {
+                    input: select_node,
+                    predicate,
+                };
+                let output_schema = ctx.phys_sm[select_node].output_schema.clone();
+                let filter_node_key = ctx.phys_sm.insert(PhysNode::new(output_schema, kind));
+                input_nodes.insert(filter_node_key);
+                transformed_exprs.push(ctx.expr_arena.add(AExpr::Column(out_name)));
+            },
+            AExpr::Agg(mut agg) => match agg {
+                // Change agg mutably so we can share the codepath for all of these.
+                IRAggExpr::Min {
+                    input: ref mut inner,
+                    ..
+                }
+                | IRAggExpr::Max {
+                    input: ref mut inner,
+                    ..
+                }
+                | IRAggExpr::Sum(ref mut inner)
+                | IRAggExpr::Mean(ref mut inner) => {
+                    let (trans_input, trans_exprs) = lower_exprs_with_ctx(input, &[*inner], ctx)?;
+                    *inner = trans_exprs[0];
+
+                    let out_name = unique_column_name();
+                    let trans_agg_expr = ctx.expr_arena.add(AExpr::Agg(agg));
+                    let expr_ir = ExprIR::new(trans_agg_expr, OutputName::Alias(out_name.clone()));
+                    let output_schema = schema_for_select(trans_input, &[expr_ir.clone()], ctx)?;
+                    let kind = PhysNodeKind::Reduce {
+                        input: trans_input,
+                        exprs: vec![expr_ir],
+                    };
+                    let reduce_node_key = ctx.phys_sm.insert(PhysNode::new(output_schema, kind));
+                    input_nodes.insert(reduce_node_key);
+                    transformed_exprs.push(ctx.expr_arena.add(AExpr::Column(out_name)));
+                },
+                IRAggExpr::Median(_)
+                | IRAggExpr::NUnique(_)
+                | IRAggExpr::First(_)
+                | IRAggExpr::Last(_)
+                | IRAggExpr::Implode(_)
+                | IRAggExpr::Quantile { .. }
+                | IRAggExpr::Count(_, _)
+                | IRAggExpr::Std(_, _)
+                | IRAggExpr::Var(_, _)
+                | IRAggExpr::AggGroups(_) => {
+                    let out_name = unique_column_name();
+                    fallback_subset.push(ExprIR::new(expr, OutputName::Alias(out_name.clone())));
+                    transformed_exprs.push(ctx.expr_arena.add(AExpr::Column(out_name)));
+                },
+            },
+            AExpr::AnonymousFunction {
+                ..
+            }
+            | AExpr::Function {
+                ..
+            }
+            | AExpr::Len // TODO: this one makes me really sad, make this streaming ASAP.
+            | AExpr::Slice { .. }
+            | AExpr::Window { .. } => {
+                let out_name = unique_column_name();
+                fallback_subset.push(ExprIR::new(expr, OutputName::Alias(out_name.clone())));
+                transformed_exprs.push(ctx.expr_arena.add(AExpr::Column(out_name)));
+            }
+        }
+    }
+
+    if !fallback_subset.is_empty() {
+        input_nodes.insert(build_fallback_node_with_ctx(input, &fallback_subset, ctx)?);
+    }
+
+    // Simplify the input nodes (also ensures the original input only occurs
+    // once in the zip).
+    input_nodes = simplify_input_nodes(input, input_nodes, ctx)?;
+
+    if input_nodes.len() == 1 {
+        // No need for any multiplexing/zipping, can directly execute.
+        return Ok((input_nodes.into_iter().next().unwrap(), transformed_exprs));
+    }
+
+    let zip_inputs = input_nodes.into_iter().collect_vec();
+    let output_schema = zip_inputs
+        .iter()
+        .flat_map(|node| ctx.phys_sm[*node].output_schema.iter_fields())
+        .collect();
+    let zip_kind = PhysNodeKind::Zip {
+        inputs: zip_inputs,
+        null_extend: false,
+    };
+    let zip_node = ctx
+        .phys_sm
+        .insert(PhysNode::new(Arc::new(output_schema), zip_kind));
+
+    Ok((zip_node, transformed_exprs))
+}
+
+/// Computes the schema that selecting the given expressions on the input node
+/// would result in.
+fn schema_for_select(
+    input: PhysNodeKey,
+    exprs: &[ExprIR],
+    ctx: &mut LowerExprContext,
+) -> PolarsResult<Arc<Schema>> {
+    let input_schema = &ctx.phys_sm[input].output_schema;
+    let output_schema: Schema = exprs
+        .iter()
+        .map(|e| {
+            let name = e.output_name();
+            let dtype = ctx.expr_arena.get(e.node()).to_dtype(
+                input_schema,
+                Context::Default,
+                ctx.expr_arena,
+            )?;
+            PolarsResult::Ok(Field::new(name, dtype))
+        })
+        .try_collect()?;
+    Ok(Arc::new(output_schema))
+}
+
+fn build_select_node_with_ctx(
+    input: PhysNodeKey,
+    exprs: &[ExprIR],
+    ctx: &mut LowerExprContext,
+) -> PolarsResult<PhysNodeKey> {
+    if exprs.iter().all(|e| is_input_independent(e.node(), ctx)) {
+        return build_input_independent_node_with_ctx(exprs, ctx);
+    }
+
+    // Are we only selecting simple columns, with the same name?
+    let all_simple_columns: Option<Vec<String>> = exprs
+        .iter()
+        .map(|e| match ctx.expr_arena.get(e.node()) {
+            AExpr::Column(name) if name.as_ref() == e.output_name() => Some(name.to_string()),
+            _ => None,
+        })
+        .collect();
+
+    if let Some(columns) = all_simple_columns {
+        let input_schema = ctx.phys_sm[input].output_schema.clone();
+        if input_schema.len() == columns.len()
+            && input_schema.iter_names().zip(&columns).all(|(l, r)| l == r)
+        {
+            // Input node already has the correct schema, just pass through.
+            return Ok(input);
+        }
+
+        let output_schema = Arc::new(input_schema.select(&columns)?);
+        let node_kind = PhysNodeKind::SimpleProjection { input, columns };
+        return Ok(ctx.phys_sm.insert(PhysNode::new(output_schema, node_kind)));
+    }
+
+    let node_exprs = exprs.iter().map(|e| e.node()).collect_vec();
+    let (transformed_input, transformed_exprs) = lower_exprs_with_ctx(input, &node_exprs, ctx)?;
+    let trans_expr_irs = exprs
+        .iter()
+        .zip(transformed_exprs)
+        .map(|(e, te)| ExprIR::new(te, OutputName::Alias(e.output_name().into())))
+        .collect_vec();
+    let output_schema = schema_for_select(transformed_input, &trans_expr_irs, ctx)?;
+    let node_kind = PhysNodeKind::Select {
+        input: transformed_input,
+        selectors: trans_expr_irs,
+        extend_original: false,
+    };
+    Ok(ctx.phys_sm.insert(PhysNode::new(output_schema, node_kind)))
+}
+
+/// Lowers an input node plus a set of expressions on that input node to an
+/// equivalent (input node, set of expressions) pair, ensuring that the new set
+/// of expressions can run on the streaming engine.
+///
+/// Ensures that if the input node is transformed it has unique column names.
+pub fn lower_exprs(
+    input: PhysNodeKey,
+    exprs: &[ExprIR],
+    expr_arena: &mut Arena<AExpr>,
+    phys_sm: &mut SlotMap<PhysNodeKey, PhysNode>,
+) -> PolarsResult<(PhysNodeKey, Vec<ExprIR>)> {
+    let mut ctx = LowerExprContext {
+        expr_arena,
+        phys_sm,
+        is_elementwise_cache: PlHashMap::new(),
+        is_input_independent_cache: PlHashMap::new(),
+    };
+    let node_exprs = exprs.iter().map(|e| e.node()).collect_vec();
+    let (transformed_input, transformed_exprs) =
+        lower_exprs_with_ctx(input, &node_exprs, &mut ctx)?;
+    let trans_expr_irs = exprs
+        .iter()
+        .zip(transformed_exprs)
+        .map(|(e, te)| ExprIR::new(te, OutputName::Alias(e.output_name().into())))
+        .collect_vec();
+    Ok((transformed_input, trans_expr_irs))
+}
+
+/// Builds a selection node given an input node and the expressions to select for.
+pub fn build_select_node(
+    input: PhysNodeKey,
+    exprs: &[ExprIR],
+    expr_arena: &mut Arena<AExpr>,
+    phys_sm: &mut SlotMap<PhysNodeKey, PhysNode>,
+) -> PolarsResult<PhysNodeKey> {
+    let mut ctx = LowerExprContext {
+        expr_arena,
+        phys_sm,
+        is_elementwise_cache: PlHashMap::new(),
+        is_input_independent_cache: PlHashMap::new(),
+    };
+    build_select_node_with_ctx(input, exprs, &mut ctx)
+}
diff --git a/crates/polars-stream/src/physical_plan/lower_ir.rs b/crates/polars-stream/src/physical_plan/lower_ir.rs
index 75ae7daeb728..6e1a8bc4e056 100644
--- a/crates/polars-stream/src/physical_plan/lower_ir.rs
+++ b/crates/polars-stream/src/physical_plan/lower_ir.rs
@@ -1,13 +1,16 @@
 use std::sync::Arc;
 
+use polars_core::prelude::{InitHashMaps, PlHashMap, PlIndexMap};
+use polars_core::schema::Schema;
 use polars_error::PolarsResult;
-use polars_expr::reduce::can_convert_into_reduction;
-use polars_plan::plans::{AExpr, Context, IR};
+use polars_plan::plans::expr_ir::{ExprIR, OutputName};
+use polars_plan::plans::{AExpr, ColumnName, Context, IR};
 use polars_plan::prelude::SinkType;
 use polars_utils::arena::{Arena, Node};
+use polars_utils::itertools::Itertools;
 use slotmap::SlotMap;
 
-use super::{PhysNode, PhysNodeKey};
+use super::{PhysNode, PhysNodeKey, PhysNodeKind};
 
 fn is_streamable(node: Node, arena: &Arena<AExpr>) -> bool {
     polars_plan::plans::is_streamable(node, arena, Context::Default)
@@ -17,164 +20,192 @@ fn is_streamable(node: Node, arena: &Arena<AExpr>) -> bool {
 pub fn lower_ir(
     node: Node,
     ir_arena: &mut Arena<IR>,
-    expr_arena: &Arena<AExpr>,
+    expr_arena: &mut Arena<AExpr>,
     phys_sm: &mut SlotMap<PhysNodeKey, PhysNode>,
+    schema_cache: &mut PlHashMap<Node, Arc<Schema>>,
 ) -> PolarsResult<PhysNodeKey> {
     let ir_node = ir_arena.get(node);
-    match ir_node {
+    let output_schema = IR::schema_with_cache(node, ir_arena, schema_cache);
+    let node_kind = match ir_node {
         IR::SimpleProjection { input, columns } => {
-            let input_ir_node = ir_arena.get(*input);
-            let input_schema = input_ir_node.schema(ir_arena).into_owned();
             let columns = columns.iter_names().map(|s| s.to_string()).collect();
-            let input = lower_ir(*input, ir_arena, expr_arena, phys_sm)?;
-            Ok(phys_sm.insert(PhysNode::SimpleProjection {
-                input,
+            let phys_input = lower_ir(*input, ir_arena, expr_arena, phys_sm, schema_cache)?;
+            PhysNodeKind::SimpleProjection {
+                input: phys_input,
                 columns,
-                input_schema,
-            }))
+            }
         },
 
-        // TODO: split partially streamable selections to avoid fallback as much as possible.
-        IR::Select {
-            input,
-            expr,
-            schema,
-            ..
-        } if expr.iter().all(|e| is_streamable(e.node(), expr_arena)) => {
+        IR::Select { input, expr, .. } => {
             let selectors = expr.clone();
-            let output_schema = schema.clone();
-            let input = lower_ir(*input, ir_arena, expr_arena, phys_sm)?;
-            Ok(phys_sm.insert(PhysNode::Select {
-                input,
-                selectors,
-                output_schema,
-                extend_original: false,
-            }))
-        },
-        // TODO: split reductions and streamable selections. E.g. sum(a) + sum(b) should be split
-        // into Select(a + b) -> Reduce(sum(a), sum(b)
-        IR::Select {
-            input,
-            expr,
-            schema: output_schema,
-            ..
-        } if expr
-            .iter()
-            .all(|e| can_convert_into_reduction(e.node(), expr_arena)) =>
-        {
-            let exprs = expr.clone();
-            let input_ir_node = ir_arena.get(*input);
-            let input_schema = input_ir_node.schema(ir_arena).into_owned();
-            let output_schema = output_schema.clone();
-            let input_node = lower_ir(*input, ir_arena, expr_arena, phys_sm)?;
-            Ok(phys_sm.insert(PhysNode::Reduce {
-                input: input_node,
-                exprs,
-                input_schema,
-                output_schema,
-            }))
+            let phys_input = lower_ir(*input, ir_arena, expr_arena, phys_sm, schema_cache)?;
+            return super::lower_expr::build_select_node(
+                phys_input, &selectors, expr_arena, phys_sm,
+            );
         },
 
-        // TODO: split partially streamable selections to avoid fallback as much as possible.
-        IR::HStack {
-            input,
-            exprs,
-            schema,
-            ..
-        } if exprs.iter().all(|e| is_streamable(e.node(), expr_arena)) => {
+        IR::HStack { input, exprs, .. }
+            if exprs.iter().all(|e| is_streamable(e.node(), expr_arena)) =>
+        {
+            // FIXME: constant literal columns should be broadcasted with hstack.
             let selectors = exprs.clone();
-            let output_schema = schema.clone();
-            let input = lower_ir(*input, ir_arena, expr_arena, phys_sm)?;
-            Ok(phys_sm.insert(PhysNode::Select {
-                input,
+            let phys_input = lower_ir(*input, ir_arena, expr_arena, phys_sm, schema_cache)?;
+            PhysNodeKind::Select {
+                input: phys_input,
                 selectors,
-                output_schema,
                 extend_original: true,
-            }))
+            }
+        },
+
+        IR::HStack { input, exprs, .. } => {
+            // We already handled the all-streamable case above, so things get more complicated.
+            // For simplicity we just do a normal select with all the original columns prepended.
+            //
+            // FIXME: constant literal columns should be broadcasted with hstack.
+            let exprs = exprs.clone();
+            let phys_input = lower_ir(*input, ir_arena, expr_arena, phys_sm, schema_cache)?;
+            let input_schema = &phys_sm[phys_input].output_schema;
+            let mut selectors = PlIndexMap::with_capacity(input_schema.len() + exprs.len());
+            for name in input_schema.iter_names() {
+                let col_name: Arc<str> = name.as_str().into();
+                let col_expr = expr_arena.add(AExpr::Column(col_name.clone()));
+                selectors.insert(
+                    name.clone(),
+                    ExprIR::new(col_expr, OutputName::ColumnLhs(col_name)),
+                );
+            }
+            for expr in exprs {
+                selectors.insert(expr.output_name().into(), expr);
+            }
+            let selectors = selectors.into_values().collect_vec();
+            return super::lower_expr::build_select_node(
+                phys_input, &selectors, expr_arena, phys_sm,
+            );
         },
 
         IR::Slice { input, offset, len } => {
             if *offset >= 0 {
                 let offset = *offset as usize;
                 let length = *len as usize;
-                let input = lower_ir(*input, ir_arena, expr_arena, phys_sm)?;
-                Ok(phys_sm.insert(PhysNode::StreamingSlice {
-                    input,
+                let phys_input = lower_ir(*input, ir_arena, expr_arena, phys_sm, schema_cache)?;
+                PhysNodeKind::StreamingSlice {
+                    input: phys_input,
                     offset,
                     length,
-                }))
+                }
             } else {
                 todo!()
             }
         },
 
-        IR::Filter { input, predicate } if is_streamable(predicate.node(), expr_arena) => {
+        IR::Filter { input, predicate } => {
             let predicate = predicate.clone();
-            let input = lower_ir(*input, ir_arena, expr_arena, phys_sm)?;
-            Ok(phys_sm.insert(PhysNode::Filter { input, predicate }))
+            let phys_input = lower_ir(*input, ir_arena, expr_arena, phys_sm, schema_cache)?;
+            let cols_and_predicate = output_schema
+                .iter_names()
+                .map(|name| {
+                    let name: ColumnName = name.as_str().into();
+                    ExprIR::new(
+                        expr_arena.add(AExpr::Column(name.clone())),
+                        OutputName::ColumnLhs(name),
+                    )
+                })
+                .chain([predicate])
+                .collect_vec();
+            let (trans_input, mut trans_cols_and_predicate) = super::lower_expr::lower_exprs(
+                phys_input,
+                &cols_and_predicate,
+                expr_arena,
+                phys_sm,
+            )?;
+
+            let filter_schema = phys_sm[trans_input].output_schema.clone();
+            let filter = PhysNodeKind::Filter {
+                input: trans_input,
+                predicate: trans_cols_and_predicate.last().unwrap().clone(),
+            };
+
+            let post_filter = phys_sm.insert(PhysNode::new(filter_schema, filter));
+            trans_cols_and_predicate.pop(); // Remove predicate.
+            return super::lower_expr::build_select_node(
+                post_filter,
+                &trans_cols_and_predicate,
+                expr_arena,
+                phys_sm,
+            );
         },
 
         IR::DataFrameScan {
             df,
-            output_schema,
+            output_schema: projection,
             filter,
-            schema: input_schema,
+            schema,
             ..
         } => {
-            if let Some(filter) = filter {
-                if !is_streamable(filter.node(), expr_arena) {
-                    todo!()
-                }
-            }
-
-            let mut phys_node = phys_sm.insert(PhysNode::InMemorySource { df: df.clone() });
+            let mut schema = schema.clone(); // This is initially the schema of df, but can change with the projection.
+            let mut node_kind = PhysNodeKind::InMemorySource { df: df.clone() };
 
-            if let Some(schema) = output_schema {
-                phys_node = phys_sm.insert(PhysNode::SimpleProjection {
-                    input: phys_node,
-                    input_schema: input_schema.clone(),
-                    columns: schema.iter_names().map(|s| s.to_string()).collect(),
-                })
+            // Do we need to apply a projection?
+            if let Some(projection_schema) = projection {
+                if projection_schema.len() != schema.len()
+                    || projection_schema
+                        .iter_names()
+                        .zip(schema.iter_names())
+                        .any(|(l, r)| l != r)
+                {
+                    let phys_input = phys_sm.insert(PhysNode::new(schema, node_kind));
+                    node_kind = PhysNodeKind::SimpleProjection {
+                        input: phys_input,
+                        columns: projection_schema
+                            .iter_names()
+                            .map(|s| s.to_string())
+                            .collect(),
+                    };
+                    schema = projection_schema.clone();
+                }
             }
 
             if let Some(predicate) = filter.clone() {
-                phys_node = phys_sm.insert(PhysNode::Filter {
-                    input: phys_node,
+                if !is_streamable(predicate.node(), expr_arena) {
+                    todo!()
+                }
+
+                let phys_input = phys_sm.insert(PhysNode::new(schema, node_kind));
+                node_kind = PhysNodeKind::Filter {
+                    input: phys_input,
                     predicate,
-                })
+                };
             }
 
-            Ok(phys_node)
+            node_kind
         },
 
         IR::Sink { input, payload } => {
             if *payload == SinkType::Memory {
-                let schema = ir_node.schema(ir_arena).into_owned();
-                let input = lower_ir(*input, ir_arena, expr_arena, phys_sm)?;
-                return Ok(phys_sm.insert(PhysNode::InMemorySink { input, schema }));
+                let phys_input = lower_ir(*input, ir_arena, expr_arena, phys_sm, schema_cache)?;
+                PhysNodeKind::InMemorySink { input: phys_input }
+            } else {
+                todo!()
             }
-
-            todo!()
         },
 
         IR::MapFunction { input, function } => {
-            let input_schema = ir_arena.get(*input).schema(ir_arena).into_owned();
             let function = function.clone();
-            let input = lower_ir(*input, ir_arena, expr_arena, phys_sm)?;
+            let phys_input = lower_ir(*input, ir_arena, expr_arena, phys_sm, schema_cache)?;
 
-            let phys_node = if function.is_streamable() {
+            if function.is_streamable() {
                 let map = Arc::new(move |df| function.evaluate(df));
-                PhysNode::Map { input, map }
+                PhysNodeKind::Map {
+                    input: phys_input,
+                    map,
+                }
             } else {
                 let map = Arc::new(move |df| function.evaluate(df));
-                PhysNode::InMemoryMap {
-                    input,
-                    input_schema,
+                PhysNodeKind::InMemoryMap {
+                    input: phys_input,
                     map,
                 }
-            };
-
-            Ok(phys_sm.insert(phys_node))
+            }
         },
 
         IR::Sort {
@@ -182,16 +213,11 @@ pub fn lower_ir(
             by_column,
             slice,
             sort_options,
-        } => {
-            let input_schema = ir_arena.get(*input).schema(ir_arena).into_owned();
-            let phys_node = PhysNode::Sort {
-                input_schema,
-                by_column: by_column.clone(),
-                slice: *slice,
-                sort_options: sort_options.clone(),
-                input: lower_ir(*input, ir_arena, expr_arena, phys_sm)?,
-            };
-            Ok(phys_sm.insert(phys_node))
+        } => PhysNodeKind::Sort {
+            by_column: by_column.clone(),
+            slice: *slice,
+            sort_options: sort_options.clone(),
+            input: lower_ir(*input, ir_arena, expr_arena, phys_sm, schema_cache)?,
         },
 
         IR::Union { inputs, options } => {
@@ -202,9 +228,9 @@ pub fn lower_ir(
             let inputs = inputs
                 .clone() // Needed to borrow ir_arena mutably.
                 .into_iter()
-                .map(|input| lower_ir(input, ir_arena, expr_arena, phys_sm))
+                .map(|input| lower_ir(input, ir_arena, expr_arena, phys_sm, schema_cache))
                 .collect::<Result<_, _>>()?;
-            Ok(phys_sm.insert(PhysNode::OrderedUnion { inputs }))
+            PhysNodeKind::OrderedUnion { inputs }
         },
 
         IR::HConcat {
@@ -212,26 +238,44 @@ pub fn lower_ir(
             schema: _,
             options: _,
         } => {
-            let input_schemas = inputs
-                .iter()
-                .map(|input| {
-                    let input_ir_node = ir_arena.get(*input);
-                    input_ir_node.schema(ir_arena).into_owned()
-                })
-                .collect();
-
             let inputs = inputs
                 .clone() // Needed to borrow ir_arena mutably.
                 .into_iter()
-                .map(|input| lower_ir(input, ir_arena, expr_arena, phys_sm))
+                .map(|input| lower_ir(input, ir_arena, expr_arena, phys_sm, schema_cache))
                 .collect::<Result<_, _>>()?;
-            Ok(phys_sm.insert(PhysNode::Zip {
+            PhysNodeKind::Zip {
                 inputs,
-                input_schemas,
                 null_extend: true,
-            }))
+            }
+        },
+
+        v @ IR::Scan { .. } => {
+            let IR::Scan {
+                paths,
+                file_info,
+                hive_parts,
+                output_schema,
+                scan_type,
+                predicate,
+                file_options,
+            } = v.clone()
+            else {
+                unreachable!();
+            };
+
+            PhysNodeKind::FileScan {
+                paths,
+                file_info,
+                hive_parts,
+                output_schema,
+                scan_type,
+                predicate,
+                file_options,
+            }
         },
 
         _ => todo!(),
-    }
+    };
+
+    Ok(phys_sm.insert(PhysNode::new(output_schema, node_kind)))
 }
diff --git a/crates/polars-stream/src/physical_plan/mod.rs b/crates/polars-stream/src/physical_plan/mod.rs
index dbc48a82077e..fd59a9ffc8d2 100644
--- a/crates/polars-stream/src/physical_plan/mod.rs
+++ b/crates/polars-stream/src/physical_plan/mod.rs
@@ -1,15 +1,23 @@
+use std::path::PathBuf;
 use std::sync::Arc;
 
 use polars_core::frame::DataFrame;
-use polars_core::prelude::SortMultipleOptions;
-use polars_core::schema::Schema;
-use polars_plan::plans::DataFrameUdf;
+use polars_core::prelude::{PlHashMap, SortMultipleOptions};
+use polars_core::schema::{Schema, SchemaRef};
+use polars_error::PolarsResult;
+use polars_plan::plans::hive::HivePartitions;
+use polars_plan::plans::{AExpr, DataFrameUdf, FileInfo, FileScan, IR};
 use polars_plan::prelude::expr_ir::ExprIR;
 
+mod fmt;
+mod lower_expr;
 mod lower_ir;
 mod to_graph;
 
-pub use lower_ir::lower_ir;
+pub use fmt::visualize_plan;
+use polars_plan::prelude::FileScanOptions;
+use polars_utils::arena::{Arena, Node};
+use slotmap::{Key, SecondaryMap, SlotMap};
 pub use to_graph::physical_plan_to_graph;
 
 slotmap::new_key_type! {
@@ -22,7 +30,22 @@ slotmap::new_key_type! {
 /// A physical plan is created when the `IR` is translated to a directed
 /// acyclic graph of operations that can run on the streaming engine.
 #[derive(Clone, Debug)]
-pub enum PhysNode {
+pub struct PhysNode {
+    output_schema: Arc<Schema>,
+    kind: PhysNodeKind,
+}
+
+impl PhysNode {
+    pub fn new(output_schema: Arc<Schema>, kind: PhysNodeKind) -> Self {
+        Self {
+            output_schema,
+            kind,
+        }
+    }
+}
+
+#[derive(Clone, Debug)]
+pub enum PhysNodeKind {
     InMemorySource {
         df: Arc<DataFrame>,
     },
@@ -31,14 +54,11 @@ pub enum PhysNode {
         input: PhysNodeKey,
         selectors: Vec<ExprIR>,
         extend_original: bool,
-        output_schema: Arc<Schema>,
     },
 
     Reduce {
         input: PhysNodeKey,
         exprs: Vec<ExprIR>,
-        input_schema: Arc<Schema>,
-        output_schema: Arc<Schema>,
     },
 
     StreamingSlice {
@@ -54,18 +74,15 @@ pub enum PhysNode {
 
     SimpleProjection {
         input: PhysNodeKey,
-        input_schema: Arc<Schema>,
         columns: Vec<String>,
     },
 
     InMemorySink {
         input: PhysNodeKey,
-        schema: Arc<Schema>,
     },
 
     InMemoryMap {
         input: PhysNodeKey,
-        input_schema: Arc<Schema>,
         map: Arc<dyn DataFrameUdf>,
     },
 
@@ -76,7 +93,6 @@ pub enum PhysNode {
 
     Sort {
         input: PhysNodeKey,
-        input_schema: Arc<Schema>, // TODO: remove when not using fallback impl.
         by_column: Vec<ExprIR>,
         slice: Option<(i64, usize)>,
         sort_options: SortMultipleOptions,
@@ -88,7 +104,6 @@ pub enum PhysNode {
 
     Zip {
         inputs: Vec<PhysNodeKey>,
-        input_schemas: Vec<Arc<Schema>>,
         /// If true shorter inputs are extended with nulls to the longest input,
         /// if false all inputs must be the same length, or have length 1 in
         /// which case they are broadcast.
@@ -99,4 +114,78 @@ pub enum PhysNode {
     Multiplexer {
         input: PhysNodeKey,
     },
+
+    FileScan {
+        paths: Arc<Vec<PathBuf>>,
+        file_info: FileInfo,
+        hive_parts: Option<Arc<Vec<HivePartitions>>>,
+        predicate: Option<ExprIR>,
+        output_schema: Option<SchemaRef>,
+        scan_type: FileScan,
+        file_options: FileScanOptions,
+    },
+}
+
+#[recursive::recursive]
+fn insert_multiplexers(
+    node: PhysNodeKey,
+    phys_sm: &mut SlotMap<PhysNodeKey, PhysNode>,
+    referenced: &mut SecondaryMap<PhysNodeKey, ()>,
+) {
+    let seen_before = referenced.insert(node, ()).is_some();
+    if seen_before && !matches!(phys_sm[node].kind, PhysNodeKind::Multiplexer { .. }) {
+        // This node is referenced at least twice. We first set the input key to
+        // null and then update it to avoid a double-mutable-borrow issue.
+        let input_schema = phys_sm[node].output_schema.clone();
+        let orig_input_node = core::mem::replace(
+            &mut phys_sm[node],
+            PhysNode::new(
+                input_schema,
+                PhysNodeKind::Multiplexer {
+                    input: PhysNodeKey::null(),
+                },
+            ),
+        );
+        let orig_input_key = phys_sm.insert(orig_input_node);
+        phys_sm[node].kind = PhysNodeKind::Multiplexer {
+            input: orig_input_key,
+        };
+    }
+
+    if !seen_before {
+        match &phys_sm[node].kind {
+            PhysNodeKind::InMemorySource { .. } | PhysNodeKind::FileScan { .. } => {},
+            PhysNodeKind::Select { input, .. }
+            | PhysNodeKind::Reduce { input, .. }
+            | PhysNodeKind::StreamingSlice { input, .. }
+            | PhysNodeKind::Filter { input, .. }
+            | PhysNodeKind::SimpleProjection { input, .. }
+            | PhysNodeKind::InMemorySink { input }
+            | PhysNodeKind::InMemoryMap { input, .. }
+            | PhysNodeKind::Map { input, .. }
+            | PhysNodeKind::Sort { input, .. }
+            | PhysNodeKind::Multiplexer { input } => {
+                insert_multiplexers(*input, phys_sm, referenced);
+            },
+
+            PhysNodeKind::OrderedUnion { inputs } | PhysNodeKind::Zip { inputs, .. } => {
+                for input in inputs.clone() {
+                    insert_multiplexers(input, phys_sm, referenced);
+                }
+            },
+        }
+    }
+}
+
+pub fn build_physical_plan(
+    root: Node,
+    ir_arena: &mut Arena<IR>,
+    expr_arena: &mut Arena<AExpr>,
+    phys_sm: &mut SlotMap<PhysNodeKey, PhysNode>,
+    schema_cache: &mut PlHashMap<Node, Arc<Schema>>,
+) -> PolarsResult<PhysNodeKey> {
+    let phys_root = lower_ir::lower_ir(root, ir_arena, expr_arena, phys_sm, schema_cache)?;
+    let mut referenced = SecondaryMap::with_capacity(phys_sm.capacity());
+    insert_multiplexers(phys_root, phys_sm, &mut referenced);
+    Ok(phys_root)
 }
diff --git a/crates/polars-stream/src/physical_plan/to_graph.rs b/crates/polars-stream/src/physical_plan/to_graph.rs
index 2e4efd9410bc..44e32e6fc348 100644
--- a/crates/polars-stream/src/physical_plan/to_graph.rs
+++ b/crates/polars-stream/src/physical_plan/to_graph.rs
@@ -6,14 +6,16 @@ use polars_expr::planner::{create_physical_expr, get_expr_depth_limit, Expressio
 use polars_expr::reduce::into_reduction;
 use polars_expr::state::ExecutionState;
 use polars_mem_engine::create_physical_plan;
+use polars_plan::global::_set_n_rows_for_scan;
 use polars_plan::plans::expr_ir::ExprIR;
 use polars_plan::plans::{AExpr, ArenaExprIter, Context, IR};
 use polars_plan::prelude::FunctionFlags;
 use polars_utils::arena::{Arena, Node};
+use polars_utils::itertools::Itertools;
 use recursive::recursive;
 use slotmap::{SecondaryMap, SlotMap};
 
-use super::{PhysNode, PhysNodeKey};
+use super::{PhysNode, PhysNodeKey, PhysNodeKind};
 use crate::expression::StreamExpr;
 use crate::graph::{Graph, GraphNodeKey};
 use crate::nodes;
@@ -52,6 +54,7 @@ struct GraphConversionContext<'a> {
 }
 
 pub fn physical_plan_to_graph(
+    root: PhysNodeKey,
     phys_sm: &SlotMap<PhysNodeKey, PhysNode>,
     expr_arena: &Arena<AExpr>,
 ) -> PolarsResult<(Graph, SecondaryMap<PhysNodeKey, GraphNodeKey>)> {
@@ -64,9 +67,7 @@ pub fn physical_plan_to_graph(
         expr_conversion_state: ExpressionConversionState::new(false, expr_depth_limit),
     };
 
-    for key in phys_sm.keys() {
-        to_graph_rec(key, &mut ctx)?;
-    }
+    to_graph_rec(root, &mut ctx)?;
 
     Ok((ctx.graph, ctx.phys_to_graph))
 }
@@ -81,8 +82,9 @@ fn to_graph_rec<'a>(
         return Ok(*graph_key);
     }
 
-    use PhysNode::*;
-    let graph_key = match &ctx.phys_sm[phys_node_key] {
+    use PhysNodeKind::*;
+    let node = &ctx.phys_sm[phys_node_key];
+    let graph_key = match &node.kind {
         InMemorySource { df } => ctx.graph.add_node(
             nodes::in_memory_source::InMemorySourceNode::new(df.clone()),
             [],
@@ -112,7 +114,6 @@ fn to_graph_rec<'a>(
         Select {
             selectors,
             input,
-            output_schema,
             extend_original,
         } => {
             let phys_selectors = selectors
@@ -123,27 +124,22 @@ fn to_graph_rec<'a>(
             ctx.graph.add_node(
                 nodes::select::SelectNode::new(
                     phys_selectors,
-                    output_schema.clone(),
+                    node.output_schema.clone(),
                     *extend_original,
                 ),
                 [input_key],
             )
         },
-        Reduce {
-            input,
-            exprs,
-            input_schema,
-            output_schema,
-        } => {
+        Reduce { input, exprs } => {
             let input_key = to_graph_rec(*input, ctx)?;
+            let input_schema = &ctx.phys_sm[*input].output_schema;
 
             let mut reductions = Vec::with_capacity(exprs.len());
             let mut inputs = Vec::with_capacity(reductions.len());
 
             for e in exprs {
                 let (red, input_node) =
-                    into_reduction(e.node(), ctx.expr_arena, input_schema.as_ref())?
-                        .expect("invariant");
+                    into_reduction(e.node(), ctx.expr_arena, input_schema)?.expect("invariant");
                 reductions.push(red);
 
                 let input_phys =
@@ -153,41 +149,33 @@ fn to_graph_rec<'a>(
             }
 
             ctx.graph.add_node(
-                nodes::reduce::ReduceNode::new(inputs, reductions, output_schema.clone()),
+                nodes::reduce::ReduceNode::new(inputs, reductions, node.output_schema.clone()),
                 [input_key],
             )
         },
-        SimpleProjection {
-            input,
-            columns,
-            input_schema,
-        } => {
+        SimpleProjection { input, columns } => {
+            let input_schema = ctx.phys_sm[*input].output_schema.clone();
             let input_key = to_graph_rec(*input, ctx)?;
             ctx.graph.add_node(
-                nodes::simple_projection::SimpleProjectionNode::new(
-                    columns.clone(),
-                    input_schema.clone(),
-                ),
+                nodes::simple_projection::SimpleProjectionNode::new(columns.clone(), input_schema),
                 [input_key],
             )
         },
 
-        InMemorySink { input, schema } => {
+        InMemorySink { input } => {
+            let input_schema = ctx.phys_sm[*input].output_schema.clone();
             let input_key = to_graph_rec(*input, ctx)?;
             ctx.graph.add_node(
-                nodes::in_memory_sink::InMemorySinkNode::new(schema.clone()),
+                nodes::in_memory_sink::InMemorySinkNode::new(input_schema),
                 [input_key],
             )
         },
 
-        InMemoryMap {
-            input,
-            input_schema,
-            map,
-        } => {
+        InMemoryMap { input, map } => {
+            let input_schema = ctx.phys_sm[*input].output_schema.clone();
             let input_key = to_graph_rec(*input, ctx)?;
             ctx.graph.add_node(
-                nodes::in_memory_map::InMemoryMapNode::new(input_schema.clone(), map.clone()),
+                nodes::in_memory_map::InMemoryMapNode::new(input_schema, map.clone()),
                 [input_key],
             )
         },
@@ -200,11 +188,11 @@ fn to_graph_rec<'a>(
 
         Sort {
             input,
-            input_schema,
             by_column,
             slice,
             sort_options,
         } => {
+            let input_schema = ctx.phys_sm[*input].output_schema.clone();
             let lmdf = Arc::new(LateMaterializedDataFrame::default());
             let mut lp_arena = Arena::default();
             let df_node = lp_arena.add(lmdf.clone().as_ir_node(input_schema.clone()));
@@ -223,7 +211,7 @@ fn to_graph_rec<'a>(
             let input_key = to_graph_rec(*input, ctx)?;
             ctx.graph.add_node(
                 nodes::in_memory_map::InMemoryMapNode::new(
-                    input_schema.clone(),
+                    input_schema,
                     Arc::new(move |df| {
                         lmdf.set_materialized_dataframe(df);
                         let mut state = ExecutionState::new();
@@ -245,15 +233,18 @@ fn to_graph_rec<'a>(
 
         Zip {
             inputs,
-            input_schemas,
             null_extend,
         } => {
+            let input_schemas = inputs
+                .iter()
+                .map(|i| ctx.phys_sm[*i].output_schema.clone())
+                .collect_vec();
             let input_keys = inputs
                 .iter()
                 .map(|i| to_graph_rec(*i, ctx))
-                .collect::<Result<Vec<_>, _>>()?;
+                .try_collect_vec()?;
             ctx.graph.add_node(
-                nodes::zip::ZipNode::new(*null_extend, input_schemas.clone()),
+                nodes::zip::ZipNode::new(*null_extend, input_schemas),
                 input_keys,
             )
         },
@@ -263,6 +254,69 @@ fn to_graph_rec<'a>(
             ctx.graph
                 .add_node(nodes::multiplexer::MultiplexerNode::new(), [input_key])
         },
+
+        v @ FileScan { .. } => {
+            let FileScan {
+                paths,
+                file_info,
+                hive_parts,
+                output_schema,
+                scan_type,
+                predicate,
+                mut file_options,
+            } = v.clone()
+            else {
+                unreachable!()
+            };
+
+            file_options.slice = if let Some((offset, len)) = file_options.slice {
+                Some((offset, _set_n_rows_for_scan(Some(len)).unwrap()))
+            } else {
+                _set_n_rows_for_scan(None).map(|x| (0, x))
+            };
+
+            let predicate = predicate
+                .map(|pred| {
+                    create_physical_expr(
+                        &pred,
+                        Context::Default,
+                        ctx.expr_arena,
+                        output_schema.as_ref(),
+                        &mut ctx.expr_conversion_state,
+                    )
+                })
+                .map_or(Ok(None), |v| v.map(Some))?;
+
+            {
+                use polars_plan::prelude::FileScan;
+
+                match scan_type {
+                    FileScan::Parquet {
+                        options,
+                        cloud_options,
+                        metadata: _,
+                    } => {
+                        if std::env::var("POLARS_DISABLE_PARQUET_SOURCE").as_deref() != Ok("1") {
+                            ctx.graph.add_node(
+                                nodes::parquet_source::ParquetSourceNode::new(
+                                    paths,
+                                    file_info,
+                                    hive_parts,
+                                    predicate,
+                                    options,
+                                    cloud_options,
+                                    file_options,
+                                ),
+                                [],
+                            )
+                        } else {
+                            todo!()
+                        }
+                    },
+                    _ => todo!(),
+                }
+            }
+        },
     };
 
     ctx.phys_to_graph.insert(phys_node_key, graph_key);
diff --git a/crates/polars-stream/src/skeleton.rs b/crates/polars-stream/src/skeleton.rs
index 64fcdc4d5c5e..435e12d39ef5 100644
--- a/crates/polars-stream/src/skeleton.rs
+++ b/crates/polars-stream/src/skeleton.rs
@@ -15,13 +15,23 @@ fn is_streamable(node: Node, arena: &Arena<AExpr>) -> bool {
 pub fn run_query(
     node: Node,
     mut ir_arena: Arena<IR>,
-    expr_arena: &Arena<AExpr>,
+    expr_arena: &mut Arena<AExpr>,
 ) -> PolarsResult<DataFrame> {
     let mut phys_sm = SlotMap::with_capacity_and_key(ir_arena.len());
-
-    let root = crate::physical_plan::lower_ir(node, &mut ir_arena, expr_arena, &mut phys_sm)?;
+    let mut schema_cache = PlHashMap::with_capacity(ir_arena.len());
+    let root = crate::physical_plan::build_physical_plan(
+        node,
+        &mut ir_arena,
+        expr_arena,
+        &mut phys_sm,
+        &mut schema_cache,
+    )?;
+    if let Ok(visual_path) = std::env::var("POLARS_VISUALIZE_PHYSICAL_PLAN") {
+        let visualization = crate::physical_plan::visualize_plan(root, &phys_sm, expr_arena);
+        std::fs::write(visual_path, visualization).unwrap();
+    }
     let (mut graph, phys_to_graph) =
-        crate::physical_plan::physical_plan_to_graph(&phys_sm, expr_arena)?;
+        crate::physical_plan::physical_plan_to_graph(root, &phys_sm, expr_arena)?;
     let mut results = crate::execute::execute_graph(&mut graph)?;
     Ok(results.remove(phys_to_graph[root]).unwrap())
 }
diff --git a/crates/polars-stream/src/utils/mod.rs b/crates/polars-stream/src/utils/mod.rs
index 018b893ea992..f8d0d74ff027 100644
--- a/crates/polars-stream/src/utils/mod.rs
+++ b/crates/polars-stream/src/utils/mod.rs
@@ -1,3 +1,5 @@
 pub mod in_memory_linearize;
 pub mod late_materialized_df;
 pub mod linearizer;
+pub mod notify_channel;
+pub mod task_handles_ext;
diff --git a/crates/polars-stream/src/utils/notify_channel.rs b/crates/polars-stream/src/utils/notify_channel.rs
new file mode 100644
index 000000000000..5aaef03ddc61
--- /dev/null
+++ b/crates/polars-stream/src/utils/notify_channel.rs
@@ -0,0 +1,56 @@
+use tokio::sync::mpsc::error::TrySendError;
+use tokio::sync::mpsc::{channel, Receiver, Sender};
+
+/// Receiver that calls `notify()` before `recv()`
+pub struct NotifyReceiver<T> {
+    receiver: Receiver<T>,
+    /// We use a channel for notify because it lets the sender know when the receiver has been
+    /// dropped.
+    notify: Sender<()>,
+}
+
+impl<T: Send> NotifyReceiver<T> {
+    pub async fn recv(&mut self) -> Option<T> {
+        match self.notify.try_send(()) {
+            Err(TrySendError::Closed(_)) => None,
+            Ok(_) => self.receiver.recv().await,
+            v @ Err(TrySendError::Full(_)) => {
+                v.unwrap();
+                unreachable!();
+            },
+        }
+    }
+}
+
+/// The notify allows us to make the producer only produce values when requested. Otherwise it would
+/// produce a new value as soon as the previous value was consumed (as there would be channel
+/// capacity).
+pub fn notify_channel<T>() -> (Sender<T>, Receiver<()>, NotifyReceiver<T>) {
+    let (tx, rx) = channel::<T>(1);
+    let (notify_tx, notify_rx) = channel(1);
+
+    (
+        tx,
+        notify_rx,
+        NotifyReceiver {
+            receiver: rx,
+            notify: notify_tx,
+        },
+    )
+}
+
+mod tests {
+
+    #[test]
+    fn test_notify_channel() {
+        use futures::FutureExt;
+
+        use super::notify_channel;
+        let (tx, mut notify, mut rx) = notify_channel();
+        assert!(notify.recv().now_or_never().is_none());
+        assert!(rx.recv().now_or_never().is_none());
+        assert_eq!(notify.recv().now_or_never().unwrap(), Some(()));
+        assert!(tx.try_send(()).is_ok());
+        assert!(rx.recv().now_or_never().is_some());
+    }
+}
diff --git a/crates/polars-stream/src/utils/task_handles_ext.rs b/crates/polars-stream/src/utils/task_handles_ext.rs
new file mode 100644
index 000000000000..edeca1558e80
--- /dev/null
+++ b/crates/polars-stream/src/utils/task_handles_ext.rs
@@ -0,0 +1,20 @@
+use std::future::Future;
+use std::pin::Pin;
+use std::task::{Context, Poll};
+
+/// Calls [`tokio::task::JoinHandle::abort`] on the join handle when dropped.
+pub struct AbortOnDropHandle<T>(pub tokio::task::JoinHandle<T>);
+
+impl<T> Future for AbortOnDropHandle<T> {
+    type Output = Result<T, tokio::task::JoinError>;
+
+    fn poll(mut self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll<Self::Output> {
+        Pin::new(&mut self.0).poll(cx)
+    }
+}
+
+impl<T> Drop for AbortOnDropHandle<T> {
+    fn drop(&mut self) {
+        self.0.abort();
+    }
+}
diff --git a/crates/polars-time/Cargo.toml b/crates/polars-time/Cargo.toml
index 9fa609614c59..a716974f0e76 100644
--- a/crates/polars-time/Cargo.toml
+++ b/crates/polars-time/Cargo.toml
@@ -34,7 +34,7 @@ dtype-datetime = ["polars-core/dtype-datetime", "temporal"]
 dtype-time = ["polars-core/dtype-time", "temporal"]
 dtype-duration = ["polars-core/dtype-duration", "temporal"]
 month_start = []
-month_end = []
+month_end = ["month_start"]
 offset_by = []
 rolling_window = ["polars-core/rolling_window"]
 rolling_window_by = ["polars-core/rolling_window_by", "dtype-duration"]
diff --git a/crates/polars-time/src/round.rs b/crates/polars-time/src/round.rs
index 4bb6f2a3386f..7fd48a407f51 100644
--- a/crates/polars-time/src/round.rs
+++ b/crates/polars-time/src/round.rs
@@ -5,6 +5,12 @@ use polars_core::prelude::*;
 use polars_utils::cache::FastFixedCache;
 
 use crate::prelude::*;
+use crate::truncate::fast_truncate;
+
+#[inline(always)]
+fn fast_round(t: i64, every: i64) -> i64 {
+    fast_truncate(t + every / 2, every)
+}
 
 pub trait PolarsRound {
     fn round(&self, every: &StringChunked, tz: Option<&Tz>) -> PolarsResult<Self>
@@ -35,11 +41,7 @@ impl PolarsRound for DatetimeChunked {
                         TimeUnit::Nanoseconds => every_parsed.duration_ns(),
                     };
                     return Ok(self
-                        .apply_values(|t| {
-                            // Round half-way values away from zero
-                            let half_away = t.signum() * every / 2;
-                            t + half_away - (t + half_away) % every
-                        })
+                        .apply_values(|t| fast_round(t, every))
                         .into_datetime(self.time_unit(), time_zone.clone()));
                 } else {
                     let w = Window::new(every_parsed, every_parsed, offset);
diff --git a/crates/polars-time/src/truncate.rs b/crates/polars-time/src/truncate.rs
index 991ce50b547a..d3c74420252f 100644
--- a/crates/polars-time/src/truncate.rs
+++ b/crates/polars-time/src/truncate.rs
@@ -12,6 +12,12 @@ pub trait PolarsTruncate {
         Self: Sized;
 }
 
+#[inline(always)]
+pub(crate) fn fast_truncate(t: i64, every: i64) -> i64 {
+    let remainder = t % every;
+    t - (remainder + every * (remainder < 0) as i64)
+}
+
 impl PolarsTruncate for DatetimeChunked {
     fn truncate(&self, tz: Option<&Tz>, every: &StringChunked) -> PolarsResult<Self> {
         let time_zone = self.time_zone();
@@ -35,10 +41,7 @@ impl PolarsTruncate for DatetimeChunked {
                         TimeUnit::Nanoseconds => every_parsed.duration_ns(),
                     };
                     return Ok(self
-                        .apply_values(|t| {
-                            let remainder = t % every;
-                            t - (remainder + every * (remainder < 0) as i64)
-                        })
+                        .apply_values(|t| fast_truncate(t, every))
                         .into_datetime(self.time_unit(), time_zone.clone()));
                 } else {
                     let w = Window::new(every_parsed, every_parsed, offset);
diff --git a/crates/polars-time/src/upsample.rs b/crates/polars-time/src/upsample.rs
index 692f1a35744c..235ec383fbc8 100644
--- a/crates/polars-time/src/upsample.rs
+++ b/crates/polars-time/src/upsample.rs
@@ -121,7 +121,6 @@ fn upsample_impl(
     stable: bool,
 ) -> PolarsResult<DataFrame> {
     let s = source.column(index_column)?;
-    s.ensure_sorted_arg("upsample")?;
     let time_type = s.dtype();
     if matches!(time_type, DataType::Date) {
         let mut df = source.clone();
@@ -184,6 +183,7 @@ fn upsample_single_impl(
     index_column: &Series,
     every: Duration,
 ) -> PolarsResult<DataFrame> {
+    index_column.ensure_sorted_arg("upsample")?;
     let index_col_name = index_column.name();
 
     use DataType::*;
diff --git a/crates/polars-time/src/windows/group_by.rs b/crates/polars-time/src/windows/group_by.rs
index 380a92180322..9ba3a2d3dbc2 100644
--- a/crates/polars-time/src/windows/group_by.rs
+++ b/crates/polars-time/src/windows/group_by.rs
@@ -557,7 +557,9 @@ pub(crate) fn group_by_values_iter_lookahead_collected(
 }
 
 /// Different from `group_by_windows`, where define window buckets and search which values fit that
-/// pre-defined bucket, this function defines every window based on the:
+/// pre-defined bucket.
+///
+/// This function defines every window based on the:
 ///     - timestamp (lower bound)
 ///     - timestamp + period (upper bound)
 /// where timestamps are the individual values in the array `time`
diff --git a/crates/polars-utils/Cargo.toml b/crates/polars-utils/Cargo.toml
index d8b2d0bc9f73..6e2ac16c6e85 100644
--- a/crates/polars-utils/Cargo.toml
+++ b/crates/polars-utils/Cargo.toml
@@ -16,11 +16,13 @@ bytemuck = { workspace = true }
 bytes = { workspace = true }
 hashbrown = { workspace = true }
 indexmap = { workspace = true }
+libc = { workspace = true }
 memmap = { workspace = true, optional = true }
 num-traits = { workspace = true }
 once_cell = { workspace = true }
 raw-cpuid = { workspace = true }
 rayon = { workspace = true }
+serde = { workspace = true, optional = true }
 smartstring = { workspace = true }
 stacker = { workspace = true }
 sysinfo = { version = "0.31", default-features = false, features = ["system"], optional = true }
@@ -35,3 +37,4 @@ version_check = { workspace = true }
 mmap = ["memmap"]
 bigidx = []
 nightly = []
+ir_serde = ["serde"]
diff --git a/crates/polars-utils/src/arena.rs b/crates/polars-utils/src/arena.rs
index 06741ff454fe..d5748725c4d1 100644
--- a/crates/polars-utils/src/arena.rs
+++ b/crates/polars-utils/src/arena.rs
@@ -1,5 +1,8 @@
 use std::sync::atomic::{AtomicU32, Ordering};
 
+#[cfg(feature = "ir_serde")]
+use serde::{Deserialize, Serialize};
+
 use crate::error::*;
 use crate::slice::GetSaferUnchecked;
 
@@ -21,6 +24,7 @@ fn index_of<T>(slice: &[T], item: &T) -> Option<usize> {
 
 #[derive(Clone, Copy, Debug, PartialEq, Eq, Hash, Ord, PartialOrd)]
 #[repr(transparent)]
+#[cfg_attr(feature = "ir_serde", derive(Serialize, Deserialize))]
 pub struct Node(pub usize);
 
 impl Default for Node {
@@ -32,6 +36,7 @@ impl Default for Node {
 static ARENA_VERSION: AtomicU32 = AtomicU32::new(0);
 
 #[derive(Debug, Clone)]
+#[cfg_attr(feature = "ir_serde", derive(Serialize, Deserialize))]
 pub struct Arena<T> {
     version: u32,
     items: Vec<T>,
diff --git a/crates/polars-utils/src/mem.rs b/crates/polars-utils/src/mem.rs
index d4f4e3d028fd..8462d1a57538 100644
--- a/crates/polars-utils/src/mem.rs
+++ b/crates/polars-utils/src/mem.rs
@@ -1,3 +1,15 @@
+use once_cell::sync::Lazy;
+static PAGE_SIZE: Lazy<usize> = Lazy::new(|| {
+    #[cfg(target_family = "unix")]
+    unsafe {
+        libc::sysconf(libc::_SC_PAGESIZE) as usize
+    }
+    #[cfg(not(target_family = "unix"))]
+    {
+        4096
+    }
+});
+
 /// # Safety
 /// This may break aliasing rules, make sure you are the only owner.
 #[allow(clippy::mut_from_ref)]
@@ -10,7 +22,7 @@ pub unsafe fn to_mutable_slice<T: Copy>(s: &[T]) -> &mut [T] {
 /// # Safety
 ///
 /// This should only be called with pointers to valid memory.
-pub unsafe fn prefetch_l2(ptr: *const u8) {
+unsafe fn prefetch_l2_impl(ptr: *const u8) {
     #[cfg(target_arch = "x86_64")]
     {
         use std::arch::x86_64::*;
@@ -23,3 +35,54 @@ pub unsafe fn prefetch_l2(ptr: *const u8) {
         unsafe { _prefetch(ptr as *const _, _PREFETCH_READ, _PREFETCH_LOCALITY2) };
     }
 }
+
+/// Attempt to prefetch the memory in the slice to the L2 cache.
+pub fn prefetch_l2(slice: &[u8]) {
+    if slice.is_empty() {
+        return;
+    }
+
+    // @TODO: We can play a bit more with this prefetching. Maybe introduce a maximum number of
+    // prefetches as to not overwhelm the processor. The linear prefetcher should pick it up
+    // at a certain point.
+
+    for i in (0..slice.len()).step_by(*PAGE_SIZE) {
+        unsafe { prefetch_l2_impl(slice[i..].as_ptr()) };
+    }
+
+    unsafe { prefetch_l2_impl(slice[slice.len() - 1..].as_ptr()) }
+}
+
+/// `madvise()` with `MADV_SEQUENTIAL` on unix systems. This is a no-op on non-unix systems.
+pub fn madvise_sequential(slice: &[u8]) {
+    #[cfg(target_family = "unix")]
+    madvise(slice, libc::MADV_SEQUENTIAL);
+}
+
+/// `madvise()` with `MADV_WILLNEED` on unix systems. This is a no-op on non-unix systems.
+pub fn madvise_willneed(slice: &[u8]) {
+    #[cfg(target_family = "unix")]
+    madvise(slice, libc::MADV_WILLNEED);
+}
+
+/// `madvise()` with `MADV_POPULATE_READ` on linux systems. This a no-op on non-linux systems.
+pub fn madvise_populate_read(#[allow(unused)] slice: &[u8]) {
+    #[cfg(target_os = "linux")]
+    madvise(slice, libc::MADV_POPULATE_READ);
+}
+
+#[cfg(target_family = "unix")]
+fn madvise(slice: &[u8], advice: libc::c_int) {
+    let ptr = slice.as_ptr();
+
+    let align = ptr as usize % *PAGE_SIZE;
+    let ptr = ptr.wrapping_sub(align);
+    let len = slice.len() + align;
+
+    if unsafe { libc::madvise(ptr as *mut libc::c_void, len, advice) } != 0 {
+        let err = std::io::Error::last_os_error();
+        if let std::io::ErrorKind::InvalidInput = err.kind() {
+            panic!("{}", err);
+        }
+    }
+}
diff --git a/crates/polars-utils/src/mmap.rs b/crates/polars-utils/src/mmap.rs
index 5bd8e2df12a5..d8db6d0ae671 100644
--- a/crates/polars-utils/src/mmap.rs
+++ b/crates/polars-utils/src/mmap.rs
@@ -93,19 +93,7 @@ mod private {
         /// Attempt to prefetch the memory belonging to to this [`MemSlice`]
         #[inline]
         pub fn prefetch(&self) {
-            if self.len() == 0 {
-                return;
-            }
-
-            // @TODO: We can play a bit more with this prefetching. Maybe introduce a maximum number of
-            // prefetches as to not overwhelm the processor. The linear prefetcher should pick it up
-            // at a certain point.
-
-            const PAGE_SIZE: usize = 4096;
-            for i in 0..self.len() / PAGE_SIZE {
-                unsafe { prefetch_l2(self[i * PAGE_SIZE..].as_ptr()) };
-            }
-            unsafe { prefetch_l2(self[self.len() - 1..].as_ptr()) }
+            prefetch_l2(self.as_ref());
         }
 
         /// # Panics
diff --git a/crates/polars/Cargo.toml b/crates/polars/Cargo.toml
index dc1e5c952371..196f1832ada4 100644
--- a/crates/polars/Cargo.toml
+++ b/crates/polars/Cargo.toml
@@ -228,6 +228,7 @@ zip_with = ["polars-core/zip_with"]
 
 bigidx = ["polars-core/bigidx", "polars-lazy?/bigidx", "polars-ops/big_idx"]
 polars_cloud = ["polars-lazy?/polars_cloud"]
+ir_serde = ["polars-plan/ir_serde"]
 
 test = [
   "lazy",
diff --git a/crates/polars/src/lib.rs b/crates/polars/src/lib.rs
index 00086736c6e5..9910df124fa5 100644
--- a/crates/polars/src/lib.rs
+++ b/crates/polars/src/lib.rs
@@ -405,6 +405,7 @@
 //!                               `T` in complex lazy expressions. However this does require `unsafe` code allow this.
 //! * `POLARS_NO_PARQUET_STATISTICS` -> if set, statistics in parquet files are ignored.
 //! * `POLARS_PANIC_ON_ERR` -> panic instead of returning an Error.
+//! * `POLARS_BACKTRACE_IN_ERR` -> include a Rust backtrace in Error messages.
 //! * `POLARS_NO_CHUNKED_JOIN` -> force rechunk before joins.
 //!
 //! ## User guide
diff --git a/crates/polars/tests/it/io/parquet/arrow/mod.rs b/crates/polars/tests/it/io/parquet/arrow/mod.rs
index 11bee66dba73..f5e0b2e39e3d 100644
--- a/crates/polars/tests/it/io/parquet/arrow/mod.rs
+++ b/crates/polars/tests/it/io/parquet/arrow/mod.rs
@@ -17,8 +17,6 @@ use polars_parquet::write::*;
 
 use super::read::file::FileReader;
 
-type ArrayStats = (Box<dyn Array>, Statistics);
-
 fn new_struct(
     arrays: Vec<Box<dyn Array>>,
     names: Vec<String>,
@@ -32,33 +30,17 @@ fn new_struct(
     StructArray::new(ArrowDataType::Struct(fields), arrays, validity)
 }
 
-pub fn read_column<R: Read + Seek>(mut reader: R, column: &str) -> PolarsResult<ArrayStats> {
+pub fn read_column<R: Read + Seek>(mut reader: R, column: &str) -> PolarsResult<Box<dyn Array>> {
     let metadata = p_read::read_metadata(&mut reader)?;
     let schema = p_read::infer_schema(&metadata)?;
 
-    let row_group = &metadata.row_groups[0];
-
-    // verify that we can read indexes
-    if p_read::indexes::has_indexes(row_group) {
-        let _indexes = p_read::indexes::read_filtered_pages(
-            &mut reader,
-            row_group,
-            &schema.fields,
-            |_, _| vec![],
-        )?;
-    }
-
     let schema = schema.filter(|_, f| f.name == column);
 
-    let field = &schema.fields[0];
-
-    let statistics = deserialize(field, row_group)?;
-
     let mut reader = FileReader::new(reader, metadata.row_groups, schema, None);
 
     let array = reader.next().unwrap()?.into_arrays().pop().unwrap();
 
-    Ok((array, statistics))
+    Ok(array)
 }
 
 pub fn pyarrow_nested_edge(column: &str) -> Box<dyn Array> {
@@ -1300,10 +1282,6 @@ fn integration_read(data: &[u8], limit: Option<usize>) -> PolarsResult<Integrati
     let metadata = p_read::read_metadata(&mut reader)?;
     let schema = p_read::infer_schema(&metadata)?;
 
-    for (field, row_group) in schema.fields.iter().zip(metadata.row_groups.iter()) {
-        let mut _statistics = deserialize(field, row_group)?;
-    }
-
     let reader = FileReader::new(
         Cursor::new(data),
         metadata.row_groups,
diff --git a/crates/polars/tests/it/io/parquet/arrow/write.rs b/crates/polars/tests/it/io/parquet/arrow/write.rs
index 9c25f346c2e1..4b3d99aab3f7 100644
--- a/crates/polars/tests/it/io/parquet/arrow/write.rs
+++ b/crates/polars/tests/it/io/parquet/arrow/write.rs
@@ -9,7 +9,7 @@ fn round_trip(
     compression: CompressionOptions,
     encodings: Vec<Encoding>,
 ) -> PolarsResult<()> {
-    round_trip_opt_stats(column, file, version, compression, encodings, true)
+    round_trip_opt_stats(column, file, version, compression, encodings)
 }
 
 fn round_trip_opt_stats(
@@ -18,9 +18,8 @@ fn round_trip_opt_stats(
     version: Version,
     compression: CompressionOptions,
     encodings: Vec<Encoding>,
-    check_stats: bool,
 ) -> PolarsResult<()> {
-    let (array, statistics) = match file {
+    let (array, _statistics) = match file {
         "nested" => (
             pyarrow_nested_nullable(column),
             pyarrow_nested_nullable_statistics(column),
@@ -68,12 +67,9 @@ fn round_trip_opt_stats(
 
     std::fs::write("list_struct_list_nullable.parquet", &data).unwrap();
 
-    let (result, stats) = read_column(&mut Cursor::new(data), "a1")?;
+    let result = read_column(&mut Cursor::new(data), "a1")?;
 
     assert_eq!(array.as_ref(), result.as_ref());
-    if check_stats {
-        assert_eq!(statistics, stats);
-    }
     Ok(())
 }
 
@@ -364,7 +360,6 @@ fn list_nested_inner_required_required_i64() -> PolarsResult<()> {
         Version::V1,
         CompressionOptions::Uncompressed,
         vec![Encoding::Plain],
-        false,
     )
 }
 
@@ -376,7 +371,6 @@ fn v1_nested_struct_list_nullable() -> PolarsResult<()> {
         Version::V1,
         CompressionOptions::Uncompressed,
         vec![Encoding::Plain],
-        true,
     )
 }
 
@@ -388,7 +382,6 @@ fn v1_nested_list_struct_list_nullable() -> PolarsResult<()> {
         Version::V1,
         CompressionOptions::Uncompressed,
         vec![Encoding::Plain],
-        true,
     )
 }
 
diff --git a/crates/polars/tests/it/io/parquet/read/indexes.rs b/crates/polars/tests/it/io/parquet/read/indexes.rs
deleted file mode 100644
index e55c8b37a474..000000000000
--- a/crates/polars/tests/it/io/parquet/read/indexes.rs
+++ /dev/null
@@ -1,143 +0,0 @@
-use polars_parquet::parquet::error::ParquetError;
-use polars_parquet::parquet::indexes::{
-    BooleanIndex, BoundaryOrder, ByteIndex, Index, NativeIndex, PageIndex, PageLocation,
-};
-use polars_parquet::parquet::read::{read_columns_indexes, read_metadata, read_pages_locations};
-use polars_parquet::parquet::schema::types::{
-    FieldInfo, PhysicalType, PrimitiveConvertedType, PrimitiveLogicalType, PrimitiveType,
-};
-use polars_parquet::parquet::schema::Repetition;
-
-/*
-import pyspark.sql  # 3.2.1
-spark = pyspark.sql.SparkSession.builder.getOrCreate()
-spark.conf.set("parquet.bloom.filter.enabled", True)
-spark.conf.set("parquet.bloom.filter.expected.ndv", 10)
-spark.conf.set("parquet.bloom.filter.max.bytes", 32)
-
-data = [(i, f"{i}", False) for i in range(10)]
-df = spark.createDataFrame(data, ["id", "string", "bool"]).repartition(1)
-
-df.write.parquet("bla.parquet", mode = "overwrite")
-*/
-const FILE: &[u8] = &[
-    80, 65, 82, 49, 21, 0, 21, 172, 1, 21, 138, 1, 21, 169, 161, 209, 137, 5, 28, 21, 20, 21, 0,
-    21, 6, 21, 8, 0, 0, 86, 24, 2, 0, 0, 0, 20, 1, 0, 13, 1, 17, 9, 1, 22, 1, 1, 0, 3, 1, 5, 12, 0,
-    0, 0, 4, 1, 5, 12, 0, 0, 0, 5, 1, 5, 12, 0, 0, 0, 6, 1, 5, 12, 0, 0, 0, 7, 1, 5, 72, 0, 0, 0,
-    8, 0, 0, 0, 0, 0, 0, 0, 9, 0, 0, 0, 0, 0, 0, 0, 21, 0, 21, 112, 21, 104, 21, 138, 239, 232,
-    170, 15, 28, 21, 20, 21, 0, 21, 6, 21, 8, 0, 0, 56, 40, 2, 0, 0, 0, 20, 1, 1, 0, 0, 0, 48, 1,
-    5, 0, 49, 1, 5, 0, 50, 1, 5, 0, 51, 1, 5, 0, 52, 1, 5, 0, 53, 1, 5, 60, 54, 1, 0, 0, 0, 55, 1,
-    0, 0, 0, 56, 1, 0, 0, 0, 57, 21, 0, 21, 16, 21, 20, 21, 202, 209, 169, 227, 4, 28, 21, 20, 21,
-    0, 21, 6, 21, 8, 0, 0, 8, 28, 2, 0, 0, 0, 20, 1, 0, 0, 25, 17, 2, 25, 24, 8, 0, 0, 0, 0, 0, 0,
-    0, 0, 25, 24, 8, 9, 0, 0, 0, 0, 0, 0, 0, 21, 2, 25, 22, 0, 0, 25, 17, 2, 25, 24, 1, 48, 25, 24,
-    1, 57, 21, 2, 25, 22, 0, 0, 25, 17, 2, 25, 24, 1, 0, 25, 24, 1, 0, 21, 2, 25, 22, 0, 0, 25, 28,
-    22, 8, 21, 188, 1, 22, 0, 0, 0, 25, 28, 22, 196, 1, 21, 150, 1, 22, 0, 0, 0, 25, 28, 22, 218,
-    2, 21, 66, 22, 0, 0, 0, 21, 64, 28, 28, 0, 0, 28, 28, 0, 0, 28, 28, 0, 0, 0, 24, 130, 24, 8,
-    134, 8, 68, 6, 2, 101, 128, 10, 64, 2, 38, 78, 114, 1, 64, 38, 1, 192, 194, 152, 64, 70, 0, 36,
-    56, 121, 64, 0, 21, 64, 28, 28, 0, 0, 28, 28, 0, 0, 28, 28, 0, 0, 0, 8, 17, 10, 29, 5, 88, 194,
-    0, 35, 208, 25, 16, 70, 68, 48, 38, 17, 16, 140, 68, 98, 56, 0, 131, 4, 193, 40, 129, 161, 160,
-    1, 96, 21, 64, 28, 28, 0, 0, 28, 28, 0, 0, 28, 28, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 21, 2, 25, 76, 72, 12, 115, 112,
-    97, 114, 107, 95, 115, 99, 104, 101, 109, 97, 21, 6, 0, 21, 4, 37, 2, 24, 2, 105, 100, 0, 21,
-    12, 37, 2, 24, 6, 115, 116, 114, 105, 110, 103, 37, 0, 76, 28, 0, 0, 0, 21, 0, 37, 2, 24, 4,
-    98, 111, 111, 108, 0, 22, 20, 25, 28, 25, 60, 38, 8, 28, 21, 4, 25, 53, 0, 6, 8, 25, 24, 2,
-    105, 100, 21, 2, 22, 20, 22, 222, 1, 22, 188, 1, 38, 8, 60, 24, 8, 9, 0, 0, 0, 0, 0, 0, 0, 24,
-    8, 0, 0, 0, 0, 0, 0, 0, 0, 22, 0, 40, 8, 9, 0, 0, 0, 0, 0, 0, 0, 24, 8, 0, 0, 0, 0, 0, 0, 0, 0,
-    0, 25, 28, 21, 0, 21, 0, 21, 2, 0, 22, 226, 4, 0, 22, 158, 4, 21, 22, 22, 156, 3, 21, 62, 0,
-    38, 196, 1, 28, 21, 12, 25, 53, 0, 6, 8, 25, 24, 6, 115, 116, 114, 105, 110, 103, 21, 2, 22,
-    20, 22, 158, 1, 22, 150, 1, 38, 196, 1, 60, 54, 0, 40, 1, 57, 24, 1, 48, 0, 25, 28, 21, 0, 21,
-    0, 21, 2, 0, 22, 192, 5, 0, 22, 180, 4, 21, 24, 22, 218, 3, 21, 34, 0, 38, 218, 2, 28, 21, 0,
-    25, 53, 0, 6, 8, 25, 24, 4, 98, 111, 111, 108, 21, 2, 22, 20, 22, 62, 22, 66, 38, 218, 2, 60,
-    24, 1, 0, 24, 1, 0, 22, 0, 40, 1, 0, 24, 1, 0, 0, 25, 28, 21, 0, 21, 0, 21, 2, 0, 22, 158, 6,
-    0, 22, 204, 4, 21, 22, 22, 252, 3, 21, 34, 0, 22, 186, 3, 22, 20, 38, 8, 22, 148, 3, 20, 0, 0,
-    25, 44, 24, 24, 111, 114, 103, 46, 97, 112, 97, 99, 104, 101, 46, 115, 112, 97, 114, 107, 46,
-    118, 101, 114, 115, 105, 111, 110, 24, 5, 51, 46, 50, 46, 49, 0, 24, 41, 111, 114, 103, 46, 97,
-    112, 97, 99, 104, 101, 46, 115, 112, 97, 114, 107, 46, 115, 113, 108, 46, 112, 97, 114, 113,
-    117, 101, 116, 46, 114, 111, 119, 46, 109, 101, 116, 97, 100, 97, 116, 97, 24, 213, 1, 123, 34,
-    116, 121, 112, 101, 34, 58, 34, 115, 116, 114, 117, 99, 116, 34, 44, 34, 102, 105, 101, 108,
-    100, 115, 34, 58, 91, 123, 34, 110, 97, 109, 101, 34, 58, 34, 105, 100, 34, 44, 34, 116, 121,
-    112, 101, 34, 58, 34, 108, 111, 110, 103, 34, 44, 34, 110, 117, 108, 108, 97, 98, 108, 101, 34,
-    58, 116, 114, 117, 101, 44, 34, 109, 101, 116, 97, 100, 97, 116, 97, 34, 58, 123, 125, 125, 44,
-    123, 34, 110, 97, 109, 101, 34, 58, 34, 115, 116, 114, 105, 110, 103, 34, 44, 34, 116, 121,
-    112, 101, 34, 58, 34, 115, 116, 114, 105, 110, 103, 34, 44, 34, 110, 117, 108, 108, 97, 98,
-    108, 101, 34, 58, 116, 114, 117, 101, 44, 34, 109, 101, 116, 97, 100, 97, 116, 97, 34, 58, 123,
-    125, 125, 44, 123, 34, 110, 97, 109, 101, 34, 58, 34, 98, 111, 111, 108, 34, 44, 34, 116, 121,
-    112, 101, 34, 58, 34, 98, 111, 111, 108, 101, 97, 110, 34, 44, 34, 110, 117, 108, 108, 97, 98,
-    108, 101, 34, 58, 116, 114, 117, 101, 44, 34, 109, 101, 116, 97, 100, 97, 116, 97, 34, 58, 123,
-    125, 125, 93, 125, 0, 24, 74, 112, 97, 114, 113, 117, 101, 116, 45, 109, 114, 32, 118, 101,
-    114, 115, 105, 111, 110, 32, 49, 46, 49, 50, 46, 50, 32, 40, 98, 117, 105, 108, 100, 32, 55,
-    55, 101, 51, 48, 99, 56, 48, 57, 51, 51, 56, 54, 101, 99, 53, 50, 99, 51, 99, 102, 97, 54, 99,
-    51, 52, 98, 55, 101, 102, 51, 51, 50, 49, 51, 50, 50, 99, 57, 52, 41, 25, 60, 28, 0, 0, 28, 0,
-    0, 28, 0, 0, 0, 182, 2, 0, 0, 80, 65, 82, 49,
-];
-
-#[test]
-fn test() -> Result<(), ParquetError> {
-    let mut reader = std::io::Cursor::new(FILE);
-
-    let expected_index = vec![
-        Box::new(NativeIndex::<i64> {
-            primitive_type: PrimitiveType::from_physical("id".to_string(), PhysicalType::Int64),
-            indexes: vec![PageIndex {
-                min: Some(0),
-                max: Some(9),
-                null_count: Some(0),
-            }],
-            boundary_order: BoundaryOrder::Ascending,
-        }) as Box<dyn Index>,
-        Box::new(ByteIndex {
-            primitive_type: PrimitiveType {
-                field_info: FieldInfo {
-                    name: "string".to_string(),
-                    repetition: Repetition::Optional,
-                    id: None,
-                },
-                logical_type: Some(PrimitiveLogicalType::String),
-                converted_type: Some(PrimitiveConvertedType::Utf8),
-                physical_type: PhysicalType::ByteArray,
-            },
-            indexes: vec![PageIndex {
-                min: Some(b"0".to_vec()),
-                max: Some(b"9".to_vec()),
-                null_count: Some(0),
-            }],
-            boundary_order: BoundaryOrder::Ascending,
-        }),
-        Box::new(BooleanIndex {
-            indexes: vec![PageIndex {
-                min: Some(false),
-                max: Some(false),
-                null_count: Some(0),
-            }],
-            boundary_order: BoundaryOrder::Ascending,
-        }),
-    ];
-    let expected_page_locations = vec![
-        vec![PageLocation {
-            offset: 4,
-            compressed_page_size: 94,
-            first_row_index: 0,
-        }],
-        vec![PageLocation {
-            offset: 98,
-            compressed_page_size: 75,
-            first_row_index: 0,
-        }],
-        vec![PageLocation {
-            offset: 173,
-            compressed_page_size: 33,
-            first_row_index: 0,
-        }],
-    ];
-
-    let metadata = read_metadata(&mut reader)?;
-    let columns = &metadata.row_groups[0].columns();
-
-    let indexes = read_columns_indexes(&mut reader, columns)?;
-    assert_eq!(&indexes, &expected_index);
-
-    let pages = read_pages_locations(&mut reader, columns)?;
-    assert_eq!(pages, expected_page_locations);
-
-    Ok(())
-}
diff --git a/crates/polars/tests/it/io/parquet/read/mod.rs b/crates/polars/tests/it/io/parquet/read/mod.rs
index f9e16619556c..73625107685f 100644
--- a/crates/polars/tests/it/io/parquet/read/mod.rs
+++ b/crates/polars/tests/it/io/parquet/read/mod.rs
@@ -6,7 +6,6 @@ mod boolean;
 mod dictionary;
 pub(crate) mod file;
 mod fixed_binary;
-mod indexes;
 mod primitive;
 mod primitive_nested;
 pub(crate) mod row_group;
@@ -159,6 +158,7 @@ where
             .map(|dict| dictionary::deserialize(&dict, column.physical_type()))
             .transpose()?;
         while let Some(page) = iterator.next().transpose()? {
+            let page = page.decompress(&mut iterator)?;
             if !has_filled {
                 struct_::extend_validity(&mut validity, &page)?;
             }
diff --git a/crates/polars/tests/it/io/parquet/read/primitive.rs b/crates/polars/tests/it/io/parquet/read/primitive.rs
index d9665f353c53..960c502fb82d 100644
--- a/crates/polars/tests/it/io/parquet/read/primitive.rs
+++ b/crates/polars/tests/it/io/parquet/read/primitive.rs
@@ -26,7 +26,6 @@ impl<'a, T: NativeType> PageState<'a, T> {
         page: &'a DataPage,
         dict: Option<&'a PrimitivePageDict<T>>,
     ) -> Result<Self, ParquetError> {
-        assert!(page.selected_rows().is_none());
         NativePageState::try_new(page, dict).map(Self::Nominal)
     }
 }
diff --git a/crates/polars/tests/it/io/parquet/write/binary.rs b/crates/polars/tests/it/io/parquet/write/binary.rs
index bb9abc62c258..8176a42cbf83 100644
--- a/crates/polars/tests/it/io/parquet/write/binary.rs
+++ b/crates/polars/tests/it/io/parquet/write/binary.rs
@@ -83,6 +83,6 @@ pub fn array_to_page_v1(
         DataPageHeader::V1(header),
         CowBuffer::Owned(buffer),
         descriptor.clone(),
-        Some(array.len()),
+        array.len(),
     )))
 }
diff --git a/crates/polars/tests/it/io/parquet/write/indexes.rs b/crates/polars/tests/it/io/parquet/write/indexes.rs
deleted file mode 100644
index 3f5f15c92828..000000000000
--- a/crates/polars/tests/it/io/parquet/write/indexes.rs
+++ /dev/null
@@ -1,100 +0,0 @@
-use std::io::Cursor;
-
-use polars_parquet::parquet::compression::CompressionOptions;
-use polars_parquet::parquet::error::ParquetResult;
-use polars_parquet::parquet::indexes::{
-    BoundaryOrder, Index, NativeIndex, PageIndex, PageLocation,
-};
-use polars_parquet::parquet::metadata::SchemaDescriptor;
-use polars_parquet::parquet::read::{read_columns_indexes, read_metadata, read_pages_locations};
-use polars_parquet::parquet::schema::types::{ParquetType, PhysicalType, PrimitiveType};
-use polars_parquet::parquet::write::{
-    Compressor, DynIter, DynStreamingIterator, FileWriter, Version, WriteOptions,
-};
-
-use super::primitive::array_to_page_v1;
-
-fn write_file() -> ParquetResult<Vec<u8>> {
-    let page1 = vec![Some(0), Some(1), None, Some(3), Some(4), Some(5), Some(6)];
-    let page2 = vec![Some(10), Some(11)];
-
-    let options = WriteOptions {
-        write_statistics: true,
-        version: Version::V1,
-    };
-
-    let schema = SchemaDescriptor::new(
-        "schema".to_string(),
-        vec![ParquetType::from_physical(
-            "col1".to_string(),
-            PhysicalType::Int32,
-        )],
-    );
-
-    let pages = vec![
-        array_to_page_v1::<i32>(&page1, &options, &schema.columns()[0].descriptor),
-        array_to_page_v1::<i32>(&page2, &options, &schema.columns()[0].descriptor),
-    ];
-
-    let pages = DynStreamingIterator::new(Compressor::new(
-        DynIter::new(pages.into_iter()),
-        CompressionOptions::Uncompressed,
-        vec![],
-    ));
-    let columns = std::iter::once(Ok(pages));
-
-    let writer = Cursor::new(vec![]);
-    let mut writer = FileWriter::new(writer, schema, options, None);
-
-    writer.write(DynIter::new(columns))?;
-    writer.end(None)?;
-
-    Ok(writer.into_inner().into_inner())
-}
-
-#[test]
-fn read_indexes_and_locations() -> ParquetResult<()> {
-    let data = write_file()?;
-    let mut reader = Cursor::new(data);
-
-    let metadata = read_metadata(&mut reader)?;
-
-    let columns = &metadata.row_groups[0].columns();
-
-    let expected_page_locations = vec![vec![
-        PageLocation {
-            offset: 4,
-            compressed_page_size: 63,
-            first_row_index: 0,
-        },
-        PageLocation {
-            offset: 67,
-            compressed_page_size: 47,
-            first_row_index: 7,
-        },
-    ]];
-    let expected_index = vec![Box::new(NativeIndex::<i32> {
-        primitive_type: PrimitiveType::from_physical("col1".to_string(), PhysicalType::Int32),
-        indexes: vec![
-            PageIndex {
-                min: Some(0),
-                max: Some(6),
-                null_count: Some(1),
-            },
-            PageIndex {
-                min: Some(10),
-                max: Some(11),
-                null_count: Some(0),
-            },
-        ],
-        boundary_order: BoundaryOrder::Unordered,
-    }) as Box<dyn Index>];
-
-    let indexes = read_columns_indexes(&mut reader, columns)?;
-    assert_eq!(&indexes, &expected_index);
-
-    let pages = read_pages_locations(&mut reader, columns)?;
-    assert_eq!(pages, expected_page_locations);
-
-    Ok(())
-}
diff --git a/crates/polars/tests/it/io/parquet/write/mod.rs b/crates/polars/tests/it/io/parquet/write/mod.rs
index 7f066fe726e4..9d1686ffdf87 100644
--- a/crates/polars/tests/it/io/parquet/write/mod.rs
+++ b/crates/polars/tests/it/io/parquet/write/mod.rs
@@ -1,5 +1,4 @@
 mod binary;
-mod indexes;
 mod primitive;
 mod sidecar;
 
diff --git a/crates/polars/tests/it/io/parquet/write/primitive.rs b/crates/polars/tests/it/io/parquet/write/primitive.rs
index 044925c5bb11..210bf0e6cefb 100644
--- a/crates/polars/tests/it/io/parquet/write/primitive.rs
+++ b/crates/polars/tests/it/io/parquet/write/primitive.rs
@@ -74,6 +74,6 @@ pub fn array_to_page_v1<T: NativeType>(
         DataPageHeader::V1(header),
         CowBuffer::Owned(buffer),
         descriptor.clone(),
-        Some(array.len()),
+        array.len(),
     )))
 }
diff --git a/docs/src/rust/user-guide/expressions/lists.rs b/docs/src/rust/user-guide/expressions/lists.rs
index 530ae4d79892..c03824c7e368 100644
--- a/docs/src/rust/user-guide/expressions/lists.rs
+++ b/docs/src/rust/user-guide/expressions/lists.rs
@@ -141,7 +141,10 @@ fn main() -> Result<(), Box<dyn std::error::Error>> {
         ListPrimitiveChunkedBuilder::new("Array_2", 8, 8, DataType::Int32);
     col2.append_slice(&[1, 7, 3]);
     col2.append_slice(&[8, 1, 0]);
-    let array_df = DataFrame::new([col1.finish(), col2.finish()].into())?;
+    let array_df = DataFrame::new(vec![
+        col1.finish().into_series(),
+        col2.finish().into_series(),
+    ])?;
 
     println!("{}", &array_df);
     // --8<-- [end:array_df]
diff --git a/py-polars/docs/source/reference/dataframe/export.rst b/py-polars/docs/source/reference/dataframe/export.rst
index c9446dd2e2d3..8ebb005221eb 100644
--- a/py-polars/docs/source/reference/dataframe/export.rst
+++ b/py-polars/docs/source/reference/dataframe/export.rst
@@ -8,6 +8,7 @@ Export DataFrame data to other formats:
 .. autosummary::
    :toctree: api/
 
+    DataFrame.__array__
     DataFrame.__arrow_c_stream__
     DataFrame.__dataframe__
     DataFrame.to_arrow
diff --git a/py-polars/docs/source/reference/dataframe/modify_select.rst b/py-polars/docs/source/reference/dataframe/modify_select.rst
index 11042e70c7bd..4d9e62556533 100644
--- a/py-polars/docs/source/reference/dataframe/modify_select.rst
+++ b/py-polars/docs/source/reference/dataframe/modify_select.rst
@@ -6,6 +6,7 @@ Manipulation/selection
 .. autosummary::
    :toctree: api/
 
+    DataFrame.__getitem__
     DataFrame.bottom_k
     DataFrame.cast
     DataFrame.clear
diff --git a/py-polars/docs/source/reference/expressions/aggregation.rst b/py-polars/docs/source/reference/expressions/aggregation.rst
index d57b76618b31..05f4ce1fabfb 100644
--- a/py-polars/docs/source/reference/expressions/aggregation.rst
+++ b/py-polars/docs/source/reference/expressions/aggregation.rst
@@ -7,6 +7,9 @@ Aggregation
    :toctree: api/
 
     Expr.agg_groups
+    Expr.all
+    Expr.any
+    Expr.approx_n_unique
     Expr.arg_max
     Expr.arg_min
     Expr.count
@@ -18,6 +21,7 @@ Aggregation
     Expr.mean
     Expr.median
     Expr.min
+    Expr.n_unique
     Expr.nan_max
     Expr.nan_min
     Expr.product
diff --git a/py-polars/docs/source/reference/expressions/col.rst b/py-polars/docs/source/reference/expressions/col.rst
index 09b5c33e82f7..612e56e4cd63 100644
--- a/py-polars/docs/source/reference/expressions/col.rst
+++ b/py-polars/docs/source/reference/expressions/col.rst
@@ -2,7 +2,7 @@
 polars.col
 ==========
 
-Create an expression representing column(s) in a dataframe.
+Create an expression representing column(s) in a DataFrame.
 
 ``col`` is technically not a function, but it can be used like one.
 
diff --git a/py-polars/docs/source/reference/expressions/functions.rst b/py-polars/docs/source/reference/expressions/functions.rst
index 4a8ca0425fca..9831d07cb803 100644
--- a/py-polars/docs/source/reference/expressions/functions.rst
+++ b/py-polars/docs/source/reference/expressions/functions.rst
@@ -97,7 +97,6 @@ These functions are available from the Polars module root and can be used as exp
    Expr.any
    Expr.approx_n_unique
    Expr.count
-   Expr.exclude
    Expr.first
    Expr.head
    Expr.implode
diff --git a/py-polars/docs/source/reference/series/aggregation.rst b/py-polars/docs/source/reference/series/aggregation.rst
index 2f6f8776ea34..fe74d9eb4fd0 100644
--- a/py-polars/docs/source/reference/series/aggregation.rst
+++ b/py-polars/docs/source/reference/series/aggregation.rst
@@ -8,6 +8,7 @@ Aggregation
 
     Series.arg_max
     Series.arg_min
+    Series.count
     Series.implode
     Series.max
     Series.mean
diff --git a/py-polars/docs/source/reference/series/export.rst b/py-polars/docs/source/reference/series/export.rst
index 2be5814ba201..06201adc2b1f 100644
--- a/py-polars/docs/source/reference/series/export.rst
+++ b/py-polars/docs/source/reference/series/export.rst
@@ -8,6 +8,7 @@ Export Series data to other formats:
 .. autosummary::
    :toctree: api/
 
+   Series.__array__
    Series.__arrow_c_stream__
    Series.to_arrow
    Series.to_frame
diff --git a/py-polars/docs/source/reference/series/index.rst b/py-polars/docs/source/reference/series/index.rst
index a8476da64b97..5e054f4a2366 100644
--- a/py-polars/docs/source/reference/series/index.rst
+++ b/py-polars/docs/source/reference/series/index.rst
@@ -19,6 +19,7 @@ This page gives an overview of all public Series methods.
    export
    list
    modify_select
+   operators
    miscellaneous
    plot
    string
diff --git a/py-polars/docs/source/reference/series/modify_select.rst b/py-polars/docs/source/reference/series/modify_select.rst
index d7ad90029349..3b15ec11ecb3 100644
--- a/py-polars/docs/source/reference/series/modify_select.rst
+++ b/py-polars/docs/source/reference/series/modify_select.rst
@@ -6,6 +6,7 @@ Manipulation/selection
 .. autosummary::
    :toctree: api/
 
+    Series.__getitem__
     Series.alias
     Series.append
     Series.arg_sort
diff --git a/py-polars/docs/source/reference/series/operators.rst b/py-polars/docs/source/reference/series/operators.rst
new file mode 100644
index 000000000000..e01c1b39e9de
--- /dev/null
+++ b/py-polars/docs/source/reference/series/operators.rst
@@ -0,0 +1,31 @@
+=========
+Operators
+=========
+
+Polars supports native Python operators for all common operations;
+many of these operators are also available as methods on the :class:`Series`
+class.
+
+Comparison
+~~~~~~~~~~
+
+.. currentmodule:: polars
+.. autosummary::
+   :toctree: api/
+
+    Series.eq
+    Series.eq_missing
+    Series.ge
+    Series.gt
+    Series.le
+    Series.lt
+    Series.ne
+    Series.ne_missing
+
+Numeric
+~~~~~~~
+
+.. autosummary::
+   :toctree: api/
+
+    Series.pow
diff --git a/py-polars/polars/_typing.py b/py-polars/polars/_typing.py
index 428c13da0e96..9b0cc722de57 100644
--- a/py-polars/polars/_typing.py
+++ b/py-polars/polars/_typing.py
@@ -70,6 +70,7 @@ def __arrow_c_stream__(self, requested_schema: object | None = None) -> object:
     Type[List[Any]],
     Type[Tuple[Any, ...]],
     Type[bytes],
+    Type[object],
     Type["Decimal"],
     Type[None],
 ]
diff --git a/py-polars/polars/_utils/cloud.py b/py-polars/polars/_utils/cloud.py
index 5b427fce4059..62d1dfd3b6ec 100644
--- a/py-polars/polars/_utils/cloud.py
+++ b/py-polars/polars/_utils/cloud.py
@@ -3,17 +3,13 @@
 from typing import TYPE_CHECKING
 
 import polars.polars as plr
-from polars._utils.various import normalize_filepath
 
 if TYPE_CHECKING:
-    from pathlib import Path
-
     from polars import LazyFrame
 
 
 def prepare_cloud_plan(
     lf: LazyFrame,
-    uri: Path | str,
     **optimizations: bool,
 ) -> bytes:
     """
@@ -23,9 +19,6 @@ def prepare_cloud_plan(
     ----------
     lf
         The LazyFrame to prepare.
-    uri
-        Path to which the file should be written.
-        Must be a URI to an accessible object store location.
     **optimizations
         Optimizations to enable or disable in the query optimizer, e.g.
         `projection_pushdown=False`.
@@ -41,6 +34,5 @@ def prepare_cloud_plan(
     ComputeError
         If the given LazyFrame cannot be serialized.
     """
-    uri = normalize_filepath(uri)
     pylf = lf._set_sink_optimizations(**optimizations)
-    return plr.prepare_cloud_plan(pylf, uri)
+    return plr.prepare_cloud_plan(pylf)
diff --git a/py-polars/polars/_utils/construction/series.py b/py-polars/polars/_utils/construction/series.py
index f13b9f5b0ec5..379bdbeb0a30 100644
--- a/py-polars/polars/_utils/construction/series.py
+++ b/py-polars/polars/_utils/construction/series.py
@@ -179,7 +179,7 @@ def sequence_to_pyseries(
         python_dtype = type(value)
 
     # temporal branch
-    if python_dtype in py_temporal_types:
+    if issubclass(python_dtype, tuple(py_temporal_types)):
         if dtype is None:
             dtype = parse_into_dtype(python_dtype)  # construct from integer
         elif dtype in py_temporal_types:
diff --git a/py-polars/polars/dataframe/frame.py b/py-polars/polars/dataframe/frame.py
index 427ac0031d56..53eb82e5342e 100644
--- a/py-polars/polars/dataframe/frame.py
+++ b/py-polars/polars/dataframe/frame.py
@@ -1218,7 +1218,130 @@ def __getitem__(
             | tuple[MultiIndexSelector, MultiColSelector]
         ),
     ) -> DataFrame | Series | Any:
-        """Get part of the DataFrame as a new DataFrame, Series, or scalar."""
+        """
+        Get part of the DataFrame as a new DataFrame, Series, or scalar.
+
+        Parameters
+        ----------
+        key
+            Rows / columns to select. This is easiest to explain via example. Suppose
+            we have a DataFrame with columns `'a'`, `'d'`, `'c'`, `'d'`. Here is what
+            various types of `key` would do:
+
+            - `df[0, 'a']` extracts the first element of column `'a'` and returns a
+              scalar.
+            - `df[0]` extracts the first row and returns a Dataframe.
+            - `df['a']` extracts column `'a'` and returns a Series.
+            - `df[0:2]` extracts the first two rows and returns a Dataframe.
+            - `df[0:2, 'a']` extracts the first two rows from column `'a'` and returns
+              a Series.
+            - `df[0:2, 0]` extracts the first two rows from the first column and returns
+              a Series.
+            - `df[[0, 1], [0, 1, 2]]` extracts the first two rows and the first three
+              columns and returns a Dataframe.
+            - `df[0: 2, ['a', 'c']]` extracts the first two rows from columns `'a'` and
+              `'c'` and returns a Dataframe.
+            - `df[:, 0: 2]` extracts all rows from the first two columns and returns a
+              Dataframe.
+            - `df[:, 'a': 'c']` extracts all rows and all columns positioned between
+              `'a'` and `'c'` *inclusive* and returns a Dataframe. In our example,
+              that would extract columns `'a'`, `'d'`, and `'c'`.
+
+        Returns
+        -------
+        DataFrame, Series, or scalar, depending on `key`.
+
+        Examples
+        --------
+        >>> df = pl.DataFrame(
+        ...     {"a": [1, 2, 3], "d": [4, 5, 6], "c": [1, 3, 2], "b": [7, 8, 9]}
+        ... )
+        >>> df[0]
+        shape: (1, 4)
+        ┌─────┬─────┬─────┬─────┐
+        │ a   ┆ d   ┆ c   ┆ b   │
+        │ --- ┆ --- ┆ --- ┆ --- │
+        │ i64 ┆ i64 ┆ i64 ┆ i64 │
+        ╞═════╪═════╪═════╪═════╡
+        │ 1   ┆ 4   ┆ 1   ┆ 7   │
+        └─────┴─────┴─────┴─────┘
+        >>> df[0, "a"]
+        1
+        >>> df["a"]
+        shape: (3,)
+        Series: 'a' [i64]
+        [
+            1
+            2
+            3
+        ]
+        >>> df[0:2]
+        shape: (2, 4)
+        ┌─────┬─────┬─────┬─────┐
+        │ a   ┆ d   ┆ c   ┆ b   │
+        │ --- ┆ --- ┆ --- ┆ --- │
+        │ i64 ┆ i64 ┆ i64 ┆ i64 │
+        ╞═════╪═════╪═════╪═════╡
+        │ 1   ┆ 4   ┆ 1   ┆ 7   │
+        │ 2   ┆ 5   ┆ 3   ┆ 8   │
+        └─────┴─────┴─────┴─────┘
+        >>> df[0:2, "a"]
+        shape: (2,)
+        Series: 'a' [i64]
+        [
+            1
+            2
+        ]
+        >>> df[0:2, 0]
+        shape: (2,)
+        Series: 'a' [i64]
+        [
+            1
+            2
+        ]
+        >>> df[[0, 1], [0, 1, 2]]
+        shape: (2, 3)
+        ┌─────┬─────┬─────┐
+        │ a   ┆ d   ┆ c   │
+        │ --- ┆ --- ┆ --- │
+        │ i64 ┆ i64 ┆ i64 │
+        ╞═════╪═════╪═════╡
+        │ 1   ┆ 4   ┆ 1   │
+        │ 2   ┆ 5   ┆ 3   │
+        └─────┴─────┴─────┘
+        >>> df[0:2, ["a", "c"]]
+        shape: (2, 2)
+        ┌─────┬─────┐
+        │ a   ┆ c   │
+        │ --- ┆ --- │
+        │ i64 ┆ i64 │
+        ╞═════╪═════╡
+        │ 1   ┆ 1   │
+        │ 2   ┆ 3   │
+        └─────┴─────┘
+        >>> df[:, 0:2]
+        shape: (3, 2)
+        ┌─────┬─────┐
+        │ a   ┆ d   │
+        │ --- ┆ --- │
+        │ i64 ┆ i64 │
+        ╞═════╪═════╡
+        │ 1   ┆ 4   │
+        │ 2   ┆ 5   │
+        │ 3   ┆ 6   │
+        └─────┴─────┘
+        >>> df[:, "a":"c"]
+        shape: (3, 3)
+        ┌─────┬─────┬─────┐
+        │ a   ┆ d   ┆ c   │
+        │ --- ┆ --- ┆ --- │
+        │ i64 ┆ i64 ┆ i64 │
+        ╞═════╪═════╪═════╡
+        │ 1   ┆ 4   ┆ 1   │
+        │ 2   ┆ 5   ┆ 3   │
+        │ 3   ┆ 6   ┆ 2   │
+        └─────┴─────┴─────┘
+        """
         return get_df_item_by_key(self, key)
 
     def __setitem__(
@@ -6462,7 +6585,7 @@ def join_asof(
         tolerance: str | int | float | timedelta | None = None,
         allow_parallel: bool = True,
         force_parallel: bool = False,
-        coalesce: bool | None = None,
+        coalesce: bool = True,
     ) -> DataFrame:
         """
         Perform an asof join.
@@ -6540,9 +6663,8 @@ def join_asof(
             Force the physical plan to evaluate the computation of both DataFrames up to
             the join in parallel.
         coalesce
-            Coalescing behavior (merging of join columns).
+            Coalescing behavior (merging of `on` / `left_on` / `right_on` columns):
 
-            - None: -> join specific.
             - True: -> Always coalesce join columns.
             - False: -> Never coalesce join columns.
 
@@ -6616,6 +6738,20 @@ def join_asof(
         - date `2016-03-01` from `population` is matched with `2016-01-01` from `gdp`;
         - date `2018-08-01` from `population` is matched with `2018-01-01` from `gdp`.
 
+        You can verify this by passing `coalesce=False`:
+
+        >>> population.join_asof(gdp, on="date", strategy="backward", coalesce=False)
+        shape: (3, 4)
+        ┌────────────┬────────────┬────────────┬──────┐
+        │ date       ┆ population ┆ date_right ┆ gdp  │
+        │ ---        ┆ ---        ┆ ---        ┆ ---  │
+        │ date       ┆ f64        ┆ date       ┆ i64  │
+        ╞════════════╪════════════╪════════════╪══════╡
+        │ 2016-03-01 ┆ 82.19      ┆ 2016-01-01 ┆ 4164 │
+        │ 2018-08-01 ┆ 82.66      ┆ 2018-01-01 ┆ 4566 │
+        │ 2019-01-01 ┆ 83.12      ┆ 2019-01-01 ┆ 4696 │
+        └────────────┴────────────┴────────────┴──────┘
+
         If we instead use `strategy='forward'`, then each date from `population` which
         doesn't have an exact match is matched with the closest later date from `gdp`:
 
diff --git a/py-polars/polars/datatypes/_parse.py b/py-polars/polars/datatypes/_parse.py
index 2649bc7905ec..e7ac78cae6dd 100644
--- a/py-polars/polars/datatypes/_parse.py
+++ b/py-polars/polars/datatypes/_parse.py
@@ -76,10 +76,10 @@ def parse_py_type_into_dtype(input: PythonDataType | type[object]) -> PolarsData
         return String()
     elif input is bool:
         return Boolean()
-    elif input is date:
-        return Date()
-    elif input is datetime:
+    elif isinstance(input, type) and issubclass(input, datetime):  # type: ignore[redundant-expr]
         return Datetime("us")
+    elif isinstance(input, type) and issubclass(input, date):  # type: ignore[redundant-expr]
+        return Date()
     elif input is timedelta:
         return Duration
     elif input is time:
@@ -97,16 +97,14 @@ def parse_py_type_into_dtype(input: PythonDataType | type[object]) -> PolarsData
     # this is required as pass through. Don't remove
     elif input == Unknown:
         return Unknown
-
     elif hasattr(input, "__origin__") and hasattr(input, "__args__"):
         return _parse_generic_into_dtype(input)
-
     else:
         _raise_on_invalid_dtype(input)
 
 
 def _parse_generic_into_dtype(input: Any) -> PolarsDataType:
-    """Parse a generic type into a Polars data type."""
+    """Parse a generic type (from typing annotation) into a Polars data type."""
     base_type = input.__origin__
     if base_type not in (tuple, list):
         _raise_on_invalid_dtype(input)
@@ -124,19 +122,19 @@ def _parse_generic_into_dtype(input: Any) -> PolarsDataType:
 
 
 PY_TYPE_STR_TO_DTYPE: SchemaDict = {
-    "int": Int64(),
-    "float": Float64(),
+    "Decimal": Decimal,
+    "NoneType": Null(),
     "bool": Boolean(),
-    "str": String(),
     "bytes": Binary(),
     "date": Date(),
-    "time": Time(),
     "datetime": Datetime("us"),
+    "float": Float64(),
+    "int": Int64(),
+    "list": List,
     "object": Object(),
-    "NoneType": Null(),
+    "str": String(),
+    "time": Time(),
     "timedelta": Duration,
-    "Decimal": Decimal,
-    "list": List,
     "tuple": List,
 }
 
@@ -177,5 +175,7 @@ def _parse_union_type_into_dtype(input: Any) -> PolarsDataType:
 
 def _raise_on_invalid_dtype(input: Any) -> NoReturn:
     """Raise an informative error if the input could not be parsed."""
-    msg = f"cannot parse input of type {type(input).__name__!r} into Polars data type: {input!r}"
+    input_type = input if type(input) is type else f"of type {type(input).__name__!r}"
+    input_detail = "" if type(input) is type else f" (given: {input!r})"
+    msg = f"cannot parse input {input_type} into Polars data type{input_detail}"
     raise TypeError(msg) from None
diff --git a/py-polars/polars/datatypes/classes.py b/py-polars/polars/datatypes/classes.py
index 08aeb53c5674..b815d7d17608 100644
--- a/py-polars/polars/datatypes/classes.py
+++ b/py-polars/polars/datatypes/classes.py
@@ -83,6 +83,14 @@ def is_temporal(cls) -> bool:  # noqa: D102
     def is_nested(cls) -> bool:  # noqa: D102
         ...
 
+    @classmethod
+    def from_python(cls, py_type: PythonDataType) -> PolarsDataType:  # noqa: D102
+        ...
+
+    @classmethod
+    def to_python(self) -> PythonDataType:  # noqa: D102
+        ...
+
 
 class DataType(metaclass=DataTypeClass):
     """Base class for all Polars data types."""
@@ -180,6 +188,49 @@ def is_nested(cls) -> bool:
         """Check whether the data type is a nested type."""
         return issubclass(cls, NestedType)
 
+    @classmethod
+    def from_python(cls, py_type: PythonDataType) -> PolarsDataType:
+        """
+        Return the Polars data type corresponding to a given Python type.
+
+        Notes
+        -----
+        Not every Python type has a corresponding Polars data type; in general
+        you should declare Polars data types explicitly to exactly specify
+        the desired type and its properties (such as scale/unit).
+
+        Examples
+        --------
+        >>> pl.DataType.from_python(int)
+        Int64
+        >>> pl.DataType.from_python(float)
+        Float64
+        >>> from datetime import tzinfo
+        >>> pl.DataType.from_python(tzinfo)  # doctest: +SKIP
+        TypeError: cannot parse input <class 'datetime.tzinfo'> into Polars data type
+        """
+        from polars.datatypes._parse import parse_into_dtype
+
+        return parse_into_dtype(py_type)
+
+    @classinstmethod  # type: ignore[arg-type]
+    def to_python(self) -> PythonDataType:
+        """
+        Return the Python type corresponding to this Polars data type.
+
+        Examples
+        --------
+        >>> pl.Int16().to_python()
+        <class 'int'>
+        >>> pl.Float32().to_python()
+        <class 'float'>
+        >>> pl.Array(pl.Date(), 10).to_python()
+        <class 'list'>
+        """
+        from polars.datatypes import dtype_to_py_type
+
+        return dtype_to_py_type(self)
+
 
 class NumericType(DataType):
     """Base class for numeric data types."""
diff --git a/py-polars/polars/datatypes/convert.py b/py-polars/polars/datatypes/convert.py
index a965422c7530..1b0806b2ea75 100644
--- a/py-polars/polars/datatypes/convert.py
+++ b/py-polars/polars/datatypes/convert.py
@@ -19,6 +19,7 @@
     Datetime,
     Decimal,
     Duration,
+    Enum,
     Field,
     Float32,
     Float64,
@@ -134,55 +135,60 @@ class _DataTypeMappings:
     @functools.lru_cache  # noqa: B019
     def DTYPE_TO_FFINAME(self) -> dict[PolarsDataType, str]:
         return {
-            Int8: "i8",
-            Int16: "i16",
-            Int32: "i32",
-            Int64: "i64",
-            UInt8: "u8",
-            UInt16: "u16",
-            UInt32: "u32",
-            UInt64: "u64",
-            Float32: "f32",
-            Float64: "f64",
-            Decimal: "decimal",
+            Binary: "binary",
             Boolean: "bool",
-            String: "str",
-            List: "list",
+            Categorical: "categorical",
             Date: "date",
             Datetime: "datetime",
+            Decimal: "decimal",
             Duration: "duration",
-            Time: "time",
+            Float32: "f32",
+            Float64: "f64",
+            Int16: "i16",
+            Int32: "i32",
+            Int64: "i64",
+            Int8: "i8",
+            List: "list",
             Object: "object",
-            Categorical: "categorical",
+            String: "str",
             Struct: "struct",
-            Binary: "binary",
+            Time: "time",
+            UInt16: "u16",
+            UInt32: "u32",
+            UInt64: "u64",
+            UInt8: "u8",
         }
 
     @property
     @functools.lru_cache  # noqa: B019
     def DTYPE_TO_PY_TYPE(self) -> dict[PolarsDataType, PythonDataType]:
         return {
-            Float64: float,
+            Array: list,
+            Binary: bytes,
+            Boolean: bool,
+            Date: date,
+            Datetime: datetime,
+            Decimal: PyDecimal,
+            Duration: timedelta,
             Float32: float,
-            Int64: int,
-            Int32: int,
+            Float64: float,
             Int16: int,
+            Int32: int,
+            Int64: int,
             Int8: int,
+            List: list,
+            Null: None.__class__,
+            Object: object,
             String: str,
-            UInt8: int,
+            Struct: dict,
+            Time: time,
             UInt16: int,
             UInt32: int,
             UInt64: int,
-            Decimal: PyDecimal,
-            Boolean: bool,
-            Duration: timedelta,
-            Datetime: datetime,
-            Date: date,
-            Time: time,
-            Binary: bytes,
-            List: list,
-            Array: list,
-            Null: None.__class__,
+            UInt8: int,
+            # the below mappings are appropriate as we restrict cat/enum to strings
+            Enum: str,
+            Categorical: str,
         }
 
     @property
@@ -190,32 +196,32 @@ def DTYPE_TO_PY_TYPE(self) -> dict[PolarsDataType, PythonDataType]:
     def NUMPY_KIND_AND_ITEMSIZE_TO_DTYPE(self) -> dict[tuple[str, int], PolarsDataType]:
         return {
             # (np.dtype().kind, np.dtype().itemsize)
+            ("M", 8): Datetime,
             ("b", 1): Boolean,
+            ("f", 4): Float32,
+            ("f", 8): Float64,
             ("i", 1): Int8,
             ("i", 2): Int16,
             ("i", 4): Int32,
             ("i", 8): Int64,
+            ("m", 8): Duration,
             ("u", 1): UInt8,
             ("u", 2): UInt16,
             ("u", 4): UInt32,
             ("u", 8): UInt64,
-            ("f", 4): Float32,
-            ("f", 8): Float64,
-            ("m", 8): Duration,
-            ("M", 8): Datetime,
         }
 
     @property
     @functools.lru_cache  # noqa: B019
     def PY_TYPE_TO_ARROW_TYPE(self) -> dict[PythonDataType, pa.lib.DataType]:
         return {
+            bool: pa.bool_(),
+            date: pa.date32(),
+            datetime: pa.timestamp("us"),
             float: pa.float64(),
             int: pa.int64(),
             str: pa.large_utf8(),
-            bool: pa.bool_(),
-            date: pa.date32(),
             time: pa.time64("us"),
-            datetime: pa.timestamp("us"),
             timedelta: pa.duration("us"),
             None.__class__: pa.null(),
         }
@@ -338,7 +344,7 @@ def maybe_cast(el: Any, dtype: PolarsDataType) -> Any:
     py_type = dtype_to_py_type(dtype)
     if not isinstance(el, py_type):
         try:
-            el = py_type(el)  # type: ignore[call-arg, misc]
+            el = py_type(el)  # type: ignore[call-arg]
         except Exception:
             msg = f"cannot convert Python type {type(el).__name__!r} to {dtype!r}"
             raise TypeError(msg) from None
diff --git a/py-polars/polars/expr/binary.py b/py-polars/polars/expr/binary.py
index cac394aa457a..7ea6dc4d79ea 100644
--- a/py-polars/polars/expr/binary.py
+++ b/py-polars/polars/expr/binary.py
@@ -257,15 +257,20 @@ def size(self, unit: SizeUnit = "b") -> Expr:
         r"""
         Get the size of binary values in the given unit.
 
+        Parameters
+        ----------
+        unit : {'b', 'kb', 'mb', 'gb', 'tb'}
+            Scale the returned size to the given unit.
+
         Returns
         -------
         Expr
-            Expression of data type :class:`UInt32`.
+            Expression of data type :class:`UInt32` or `Float64`.
 
         Examples
         --------
         >>> from os import urandom
-        >>> df = pl.DataFrame({"data": [urandom(n) for n in (512, 256, 2560, 1024)]})
+        >>> df = pl.DataFrame({"data": [urandom(n) for n in (512, 256, 1024)]})
         >>> df.with_columns(  # doctest: +IGNORE_RESULT
         ...     n_bytes=pl.col("data").bin.size(),
         ...     n_kilobytes=pl.col("data").bin.size("kb"),
@@ -278,7 +283,6 @@ def size(self, unit: SizeUnit = "b") -> Expr:
         ╞═════════════════════════════════╪═════════╪═════════════╡
         │ b"y?~B\x83\xf4V\x07\xd3\xfb\xb… ┆ 512     ┆ 0.5         │
         │ b"\xee$4@f\xc14\x07\x8e\x88\x1… ┆ 256     ┆ 0.25        │
-        │ b"~\x17\x9c\xb1\xf4\xdb?\xe9\x… ┆ 2560    ┆ 2.5         │
         │ b"\x80\xbd\xb9nEq;2\x99$\xf9\x… ┆ 1024    ┆ 1.0         │
         └─────────────────────────────────┴─────────┴─────────────┘
         """
diff --git a/py-polars/polars/expr/datetime.py b/py-polars/polars/expr/datetime.py
index cdf6ccb6516f..9a03b46b12d3 100644
--- a/py-polars/polars/expr/datetime.py
+++ b/py-polars/polars/expr/datetime.py
@@ -284,10 +284,12 @@ def round(self, every: str | dt.timedelta | IntoExprColumn) -> Expr:
             This functionality is considered **unstable**. It may be changed
             at any point without it being considered a breaking change.
 
-        Each date/datetime in the first half of the interval
-        is mapped to the start of its bucket.
-        Each date/datetime in the second half of the interval
-        is mapped to the end of its bucket.
+        - Each date/datetime in the first half of the interval
+          is mapped to the start of its bucket.
+        - Each date/datetime in the second half of the interval
+          is mapped to the end of its bucket.
+        - Half-way points are mapped to the start of their bucket.
+
         Ambiguous results are localised using the DST offset of the original timestamp -
         for example, rounding `'2022-11-06 01:20:00 CST'` by `'1h'` results in
         `'2022-11-06 01:00:00 CST'`, whereas rounding `'2022-11-06 01:20:00 CDT'` by
diff --git a/py-polars/polars/expr/expr.py b/py-polars/polars/expr/expr.py
index 9acae4b745c9..54c9ba55e09d 100644
--- a/py-polars/polars/expr/expr.py
+++ b/py-polars/polars/expr/expr.py
@@ -681,9 +681,9 @@ def alias(self, name: str) -> Expr:
 
         See Also
         --------
-        map
-        prefix
-        suffix
+        name.map
+        name.prefix
+        name.suffix
 
         Examples
         --------
@@ -4300,14 +4300,14 @@ def map_batches(
             Dtype of the output Series.
             If not set, the dtype will be inferred based on the first non-null value
             that is returned by the function.
-        is_elementwise
-            If set to true this can run in the streaming engine, but may yield
-            incorrect results in group-by. Ensure you know what you are doing!
         agg_list
             Aggregate the values of the expression into a list before applying the
             function. This parameter only works in a group-by context.
             The function will be invoked only once on a list of groups, rather than
             once per group.
+        is_elementwise
+            If set to true this can run in the streaming engine, but may yield
+            incorrect results in group-by. Ensure you know what you are doing!
         returns_scalar
             If the function returns a scalar, by default it will be wrapped in
             a list in the output, since the assumption is that the function
@@ -4745,7 +4745,7 @@ def flatten(self) -> Expr:
         """
         Flatten a list or string column.
 
-        Alias for :func:`polars.expr.list.ExprListNameSpace.explode`.
+        Alias for :func:`Expr.list.explode`.
 
         Examples
         --------
@@ -4885,7 +4885,7 @@ def head(self, n: int | Expr = 10) -> Expr:
         Examples
         --------
         >>> df = pl.DataFrame({"foo": [1, 2, 3, 4, 5, 6, 7]})
-        >>> df.head(3)
+        >>> df.select(pl.col("foo").head(3))
         shape: (3, 1)
         ┌─────┐
         │ foo │
@@ -4911,7 +4911,7 @@ def tail(self, n: int | Expr = 10) -> Expr:
         Examples
         --------
         >>> df = pl.DataFrame({"foo": [1, 2, 3, 4, 5, 6, 7]})
-        >>> df.tail(3)
+        >>> df.select(pl.col("foo").tail(3))
         shape: (3, 1)
         ┌─────┐
         │ foo │
@@ -4942,7 +4942,7 @@ def limit(self, n: int | Expr = 10) -> Expr:
         Examples
         --------
         >>> df = pl.DataFrame({"foo": [1, 2, 3, 4, 5, 6, 7]})
-        >>> df.limit(3)
+        >>> df.select(pl.col("foo").limit(3))
         shape: (3, 1)
         ┌─────┐
         │ foo │
@@ -9213,6 +9213,9 @@ def shuffle(self, seed: int | None = None) -> Expr:
         """
         Shuffle the contents of this expression.
 
+        Note this is shuffled independently of any other column or Expression. If you
+        want each row to stay the same use df.sample(shuffle=True)
+
         Parameters
         ----------
         seed
diff --git a/py-polars/polars/expr/list.py b/py-polars/polars/expr/list.py
index 390904997697..5655b58c86cb 100644
--- a/py-polars/polars/expr/list.py
+++ b/py-polars/polars/expr/list.py
@@ -1005,7 +1005,7 @@ def explode(self) -> Expr:
 
         See Also
         --------
-        ExprNameSpace.reshape: Reshape this Expr to a flat Series or a Series of Lists.
+        Expr.reshape: Reshape this Expr to a flat Series or a Series of Lists.
 
         Examples
         --------
diff --git a/py-polars/polars/expr/name.py b/py-polars/polars/expr/name.py
index 9c730d2d3206..8b6fe24d8dea 100644
--- a/py-polars/polars/expr/name.py
+++ b/py-polars/polars/expr/name.py
@@ -286,17 +286,22 @@ def to_uppercase(self) -> Expr:
 
     def map_fields(self, function: Callable[[str], str]) -> Expr:
         """
-        Rename fields of a struct by mapping a function over the field name.
+        Rename fields of a struct by mapping a function over the field name(s).
 
         Notes
         -----
-        This only take effects for struct.
+        This only takes effect for struct columns.
 
         Parameters
         ----------
         function
             Function that maps a field name to a new name.
 
+        See Also
+        --------
+        prefix_fields
+        suffix_fields
+
         Examples
         --------
         >>> df = pl.DataFrame({"x": {"a": 1, "b": 2}})
@@ -307,16 +312,21 @@ def map_fields(self, function: Callable[[str], str]) -> Expr:
 
     def prefix_fields(self, prefix: str) -> Expr:
         """
-        Add a prefix to all fields name of a struct.
+        Add a prefix to all field names of a struct.
 
         Notes
         -----
-        This only take effects for struct.
+        This only takes effect for struct columns.
 
         Parameters
         ----------
         prefix
-            Prefix to add to the filed name
+            Prefix to add to the field name.
+
+        See Also
+        --------
+        map_fields
+        suffix_fields
 
         Examples
         --------
@@ -328,16 +338,21 @@ def prefix_fields(self, prefix: str) -> Expr:
 
     def suffix_fields(self, suffix: str) -> Expr:
         """
-        Add a suffix to all fields name of a struct.
+        Add a suffix to all field names of a struct.
 
         Notes
         -----
-        This only take effects for struct.
+        This only takes effect for struct columns.
 
         Parameters
         ----------
         suffix
-            Suffix to add to the filed name
+            Suffix to add to the field name.
+
+        See Also
+        --------
+        map_fields
+        prefix_fields
 
         Examples
         --------
diff --git a/py-polars/polars/expr/string.py b/py-polars/polars/expr/string.py
index d6232f7ff19b..bf8e739462b2 100644
--- a/py-polars/polars/expr/string.py
+++ b/py-polars/polars/expr/string.py
@@ -923,7 +923,7 @@ def contains(
         self, pattern: str | Expr, *, literal: bool = False, strict: bool = True
     ) -> Expr:
         """
-        Check if string contains a substring that matches a pattern.
+        Check if the string contains a substring that matches a pattern.
 
         Parameters
         ----------
@@ -1034,7 +1034,7 @@ def find(
 
         See Also
         --------
-        contains : Check if string contains a substring that matches a regex.
+        contains : Check if the string contains a substring that matches a pattern.
 
         Examples
         --------
@@ -1093,7 +1093,7 @@ def ends_with(self, suffix: str | Expr) -> Expr:
 
         See Also
         --------
-        contains : Check if string contains a substring that matches a regex.
+        contains : Check if the string contains a substring that matches a pattern.
         starts_with : Check if string values start with a substring.
 
         Examples
@@ -1156,7 +1156,7 @@ def starts_with(self, prefix: str | Expr) -> Expr:
 
         See Also
         --------
-        contains : Check if string contains a substring that matches a regex.
+        contains : Check if the string contains a substring that matches a pattern.
         ends_with : Check if string values end with a substring.
 
         Examples
diff --git a/py-polars/polars/functions/lit.py b/py-polars/polars/functions/lit.py
index 700c65dbee7c..8853963cbeed 100644
--- a/py-polars/polars/functions/lit.py
+++ b/py-polars/polars/functions/lit.py
@@ -6,12 +6,8 @@
 from typing import TYPE_CHECKING, Any
 
 import polars._reexport as pl
-from polars._utils.convert import (
-    time_to_int,
-    timedelta_to_int,
-)
 from polars._utils.wrap import wrap_expr
-from polars.datatypes import Date, Datetime, Duration, Enum, Time
+from polars.datatypes import Date, Datetime, Duration, Enum
 from polars.dependencies import _check_for_numpy
 from polars.dependencies import numpy as np
 
@@ -114,17 +110,13 @@ def lit(
         return expr
 
     elif isinstance(value, timedelta):
-        if dtype is not None and (tu := getattr(dtype, "time_unit", "us")) is not None:
-            time_unit = tu  # type: ignore[assignment]
-        else:
-            time_unit = "us"
-
-        td_int = timedelta_to_int(value, time_unit)
-        return lit(td_int).cast(Duration(time_unit))
+        expr = wrap_expr(plr.lit(value, allow_object=False))
+        if dtype is not None and (tu := getattr(dtype, "time_unit", None)) is not None:
+            expr = expr.cast(Duration(tu))
+        return expr
 
     elif isinstance(value, time):
-        time_int = time_to_int(value)
-        return lit(time_int).cast(Time)
+        return wrap_expr(plr.lit(value, allow_object=False))
 
     elif isinstance(value, date):
         if dtype == Datetime:
diff --git a/py-polars/polars/io/database/_executor.py b/py-polars/polars/io/database/_executor.py
index ef044d70d139..0c8513ff2f4d 100644
--- a/py-polars/polars/io/database/_executor.py
+++ b/py-polars/polars/io/database/_executor.py
@@ -384,7 +384,7 @@ def _normalise_cursor(self, conn: Any) -> Cursor:
                     return conn.engine.raw_connection().cursor()
                 elif conn.engine.driver == "duckdb_engine":
                     self.driver_name = "duckdb"
-                    return conn.engine.raw_connection().driver_connection.c
+                    return conn.engine.raw_connection().driver_connection
                 elif self._is_alchemy_engine(conn):
                     # note: if we create it, we can close it
                     self.can_close_cursor = True
diff --git a/py-polars/polars/lazyframe/engine_config.py b/py-polars/polars/lazyframe/engine_config.py
index 8dd75ebc48b6..ee6c2f8b7941 100644
--- a/py-polars/polars/lazyframe/engine_config.py
+++ b/py-polars/polars/lazyframe/engine_config.py
@@ -18,7 +18,7 @@ class GPUEngine:
 
     - `device`: Select the device to run the query on.
     - `memory_resource`: Set an RMM memory resource for
-       device-side allocations.
+      device-side allocations.
     """
 
     device: int | None
diff --git a/py-polars/polars/lazyframe/frame.py b/py-polars/polars/lazyframe/frame.py
index 8ce2a1d2e362..ff4ab963faaf 100644
--- a/py-polars/polars/lazyframe/frame.py
+++ b/py-polars/polars/lazyframe/frame.py
@@ -2,6 +2,7 @@
 
 import contextlib
 import os
+import warnings
 from datetime import date, datetime, time, timedelta
 from functools import lru_cache, partial, reduce
 from io import BytesIO, StringIO
@@ -41,6 +42,7 @@
     _in_notebook,
     _is_generator,
     extend_bool,
+    find_stacklevel,
     is_bool_sequence,
     is_sequence,
     issue_warning,
@@ -680,7 +682,7 @@ def serialize(
             The format in which to serialize. Options:
 
             - `"binary"`: Serialize to binary format (bytes). This is the default.
-            - `"json"`: Serialize to JSON format (string).
+            - `"json"`: Serialize to JSON format (string) (deprecated).
 
         See Also
         --------
@@ -716,6 +718,11 @@ def serialize(
         if format == "binary":
             serializer = self._ldf.serialize_binary
         elif format == "json":
+            msg = "'json' serialization format of LazyFrame is deprecated"
+            warnings.warn(
+                msg,
+                stacklevel=find_stacklevel(),
+            )
             serializer = self._ldf.serialize_json
         else:
             msg = f"`format` must be one of {{'binary', 'json'}}, got {format!r}"
@@ -3993,7 +4000,7 @@ def join_asof(
         tolerance: str | int | float | timedelta | None = None,
         allow_parallel: bool = True,
         force_parallel: bool = False,
-        coalesce: bool | None = None,
+        coalesce: bool = True,
     ) -> LazyFrame:
         """
         Perform an asof join.
@@ -4071,53 +4078,214 @@ def join_asof(
             Force the physical plan to evaluate the computation of both DataFrames up to
             the join in parallel.
         coalesce
-            Coalescing behavior (merging of join columns).
+            Coalescing behavior (merging of `on` / `left_on` / `right_on` columns):
 
-            - None: -> join specific.
             - True: -> Always coalesce join columns.
             - False: -> Never coalesce join columns.
 
             Note that joining on any other expressions than `col`
             will turn off coalescing.
 
-
         Examples
         --------
-        >>> from datetime import datetime
+        >>> from datetime import date
         >>> gdp = pl.LazyFrame(
         ...     {
-        ...         "date": [
-        ...             datetime(2016, 1, 1),
-        ...             datetime(2017, 1, 1),
-        ...             datetime(2018, 1, 1),
-        ...             datetime(2019, 1, 1),
-        ...         ],  # note record date: Jan 1st (sorted!)
-        ...         "gdp": [4164, 4411, 4566, 4696],
+        ...         "date": pl.date_range(
+        ...             date(2016, 1, 1),
+        ...             date(2020, 1, 1),
+        ...             "1y",
+        ...             eager=True,
+        ...         ),
+        ...         "gdp": [4164, 4411, 4566, 4696, 4827],
         ...     }
-        ... ).set_sorted("date")
+        ... )
+        >>> gdp.collect()
+        shape: (5, 2)
+        ┌────────────┬──────┐
+        │ date       ┆ gdp  │
+        │ ---        ┆ ---  │
+        │ date       ┆ i64  │
+        ╞════════════╪══════╡
+        │ 2016-01-01 ┆ 4164 │
+        │ 2017-01-01 ┆ 4411 │
+        │ 2018-01-01 ┆ 4566 │
+        │ 2019-01-01 ┆ 4696 │
+        │ 2020-01-01 ┆ 4827 │
+        └────────────┴──────┘
+
         >>> population = pl.LazyFrame(
         ...     {
-        ...         "date": [
-        ...             datetime(2016, 5, 12),
-        ...             datetime(2017, 5, 12),
-        ...             datetime(2018, 5, 12),
-        ...             datetime(2019, 5, 12),
-        ...         ],  # note record date: May 12th (sorted!)
-        ...         "population": [82.19, 82.66, 83.12, 83.52],
+        ...         "date": [date(2016, 3, 1), date(2018, 8, 1), date(2019, 1, 1)],
+        ...         "population": [82.19, 82.66, 83.12],
         ...     }
-        ... ).set_sorted("date")
+        ... ).sort("date")
+        >>> population.collect()
+        shape: (3, 2)
+        ┌────────────┬────────────┐
+        │ date       ┆ population │
+        │ ---        ┆ ---        │
+        │ date       ┆ f64        │
+        ╞════════════╪════════════╡
+        │ 2016-03-01 ┆ 82.19      │
+        │ 2018-08-01 ┆ 82.66      │
+        │ 2019-01-01 ┆ 83.12      │
+        └────────────┴────────────┘
+
+        Note how the dates don't quite match. If we join them using `join_asof` and
+        `strategy='backward'`, then each date from `population` which doesn't have an
+        exact match is matched with the closest earlier date from `gdp`:
+
         >>> population.join_asof(gdp, on="date", strategy="backward").collect()
-        shape: (4, 3)
-        ┌─────────────────────┬────────────┬──────┐
-        │ date                ┆ population ┆ gdp  │
-        │ ---                 ┆ ---        ┆ ---  │
-        │ datetime[μs]        ┆ f64        ┆ i64  │
-        ╞═════════════════════╪════════════╪══════╡
-        │ 2016-05-12 00:00:00 ┆ 82.19      ┆ 4164 │
-        │ 2017-05-12 00:00:00 ┆ 82.66      ┆ 4411 │
-        │ 2018-05-12 00:00:00 ┆ 83.12      ┆ 4566 │
-        │ 2019-05-12 00:00:00 ┆ 83.52      ┆ 4696 │
-        └─────────────────────┴────────────┴──────┘
+        shape: (3, 3)
+        ┌────────────┬────────────┬──────┐
+        │ date       ┆ population ┆ gdp  │
+        │ ---        ┆ ---        ┆ ---  │
+        │ date       ┆ f64        ┆ i64  │
+        ╞════════════╪════════════╪══════╡
+        │ 2016-03-01 ┆ 82.19      ┆ 4164 │
+        │ 2018-08-01 ┆ 82.66      ┆ 4566 │
+        │ 2019-01-01 ┆ 83.12      ┆ 4696 │
+        └────────────┴────────────┴──────┘
+
+        Note how:
+
+        - date `2016-03-01` from `population` is matched with `2016-01-01` from `gdp`;
+        - date `2018-08-01` from `population` is matched with `2018-01-01` from `gdp`.
+
+        You can verify this by passing `coalesce=False`:
+
+        >>> population.join_asof(
+        ...     gdp, on="date", strategy="backward", coalesce=False
+        ... ).collect()
+        shape: (3, 4)
+        ┌────────────┬────────────┬────────────┬──────┐
+        │ date       ┆ population ┆ date_right ┆ gdp  │
+        │ ---        ┆ ---        ┆ ---        ┆ ---  │
+        │ date       ┆ f64        ┆ date       ┆ i64  │
+        ╞════════════╪════════════╪════════════╪══════╡
+        │ 2016-03-01 ┆ 82.19      ┆ 2016-01-01 ┆ 4164 │
+        │ 2018-08-01 ┆ 82.66      ┆ 2018-01-01 ┆ 4566 │
+        │ 2019-01-01 ┆ 83.12      ┆ 2019-01-01 ┆ 4696 │
+        └────────────┴────────────┴────────────┴──────┘
+
+        If we instead use `strategy='forward'`, then each date from `population` which
+        doesn't have an exact match is matched with the closest later date from `gdp`:
+
+        >>> population.join_asof(gdp, on="date", strategy="forward").collect()
+        shape: (3, 3)
+        ┌────────────┬────────────┬──────┐
+        │ date       ┆ population ┆ gdp  │
+        │ ---        ┆ ---        ┆ ---  │
+        │ date       ┆ f64        ┆ i64  │
+        ╞════════════╪════════════╪══════╡
+        │ 2016-03-01 ┆ 82.19      ┆ 4411 │
+        │ 2018-08-01 ┆ 82.66      ┆ 4696 │
+        │ 2019-01-01 ┆ 83.12      ┆ 4696 │
+        └────────────┴────────────┴──────┘
+
+        Note how:
+
+        - date `2016-03-01` from `population` is matched with `2017-01-01` from `gdp`;
+        - date `2018-08-01` from `population` is matched with `2019-01-01` from `gdp`.
+
+        Finally, `strategy='nearest'` gives us a mix of the two results above, as each
+        date from `population` which doesn't have an exact match is matched with the
+        closest date from `gdp`, regardless of whether it's earlier or later:
+
+        >>> population.join_asof(gdp, on="date", strategy="nearest").collect()
+        shape: (3, 3)
+        ┌────────────┬────────────┬──────┐
+        │ date       ┆ population ┆ gdp  │
+        │ ---        ┆ ---        ┆ ---  │
+        │ date       ┆ f64        ┆ i64  │
+        ╞════════════╪════════════╪══════╡
+        │ 2016-03-01 ┆ 82.19      ┆ 4164 │
+        │ 2018-08-01 ┆ 82.66      ┆ 4696 │
+        │ 2019-01-01 ┆ 83.12      ┆ 4696 │
+        └────────────┴────────────┴──────┘
+
+        Note how:
+
+        - date `2016-03-01` from `population` is matched with `2016-01-01` from `gdp`;
+        - date `2018-08-01` from `population` is matched with `2019-01-01` from `gdp`.
+
+        They `by` argument allows joining on another column first, before the asof join.
+        In this example we join by `country` first, then asof join by date, as above.
+
+        >>> gdp_dates = pl.date_range(  # fmt: skip
+        ...     date(2016, 1, 1), date(2020, 1, 1), "1y", eager=True
+        ... )
+        >>> gdp2 = pl.LazyFrame(
+        ...     {
+        ...         "country": ["Germany"] * 5 + ["Netherlands"] * 5,
+        ...         "date": pl.concat([gdp_dates, gdp_dates]),
+        ...         "gdp": [4164, 4411, 4566, 4696, 4827, 784, 833, 914, 910, 909],
+        ...     }
+        ... ).sort("country", "date")
+        >>>
+        >>> gdp2.collect()
+        shape: (10, 3)
+        ┌─────────────┬────────────┬──────┐
+        │ country     ┆ date       ┆ gdp  │
+        │ ---         ┆ ---        ┆ ---  │
+        │ str         ┆ date       ┆ i64  │
+        ╞═════════════╪════════════╪══════╡
+        │ Germany     ┆ 2016-01-01 ┆ 4164 │
+        │ Germany     ┆ 2017-01-01 ┆ 4411 │
+        │ Germany     ┆ 2018-01-01 ┆ 4566 │
+        │ Germany     ┆ 2019-01-01 ┆ 4696 │
+        │ Germany     ┆ 2020-01-01 ┆ 4827 │
+        │ Netherlands ┆ 2016-01-01 ┆ 784  │
+        │ Netherlands ┆ 2017-01-01 ┆ 833  │
+        │ Netherlands ┆ 2018-01-01 ┆ 914  │
+        │ Netherlands ┆ 2019-01-01 ┆ 910  │
+        │ Netherlands ┆ 2020-01-01 ┆ 909  │
+        └─────────────┴────────────┴──────┘
+        >>> pop2 = pl.LazyFrame(
+        ...     {
+        ...         "country": ["Germany"] * 3 + ["Netherlands"] * 3,
+        ...         "date": [
+        ...             date(2016, 3, 1),
+        ...             date(2018, 8, 1),
+        ...             date(2019, 1, 1),
+        ...             date(2016, 3, 1),
+        ...             date(2018, 8, 1),
+        ...             date(2019, 1, 1),
+        ...         ],
+        ...         "population": [82.19, 82.66, 83.12, 17.11, 17.32, 17.40],
+        ...     }
+        ... ).sort("country", "date")
+        >>>
+        >>> pop2.collect()
+        shape: (6, 3)
+        ┌─────────────┬────────────┬────────────┐
+        │ country     ┆ date       ┆ population │
+        │ ---         ┆ ---        ┆ ---        │
+        │ str         ┆ date       ┆ f64        │
+        ╞═════════════╪════════════╪════════════╡
+        │ Germany     ┆ 2016-03-01 ┆ 82.19      │
+        │ Germany     ┆ 2018-08-01 ┆ 82.66      │
+        │ Germany     ┆ 2019-01-01 ┆ 83.12      │
+        │ Netherlands ┆ 2016-03-01 ┆ 17.11      │
+        │ Netherlands ┆ 2018-08-01 ┆ 17.32      │
+        │ Netherlands ┆ 2019-01-01 ┆ 17.4       │
+        └─────────────┴────────────┴────────────┘
+        >>> pop2.join_asof(gdp2, by="country", on="date", strategy="nearest").collect()
+        shape: (6, 4)
+        ┌─────────────┬────────────┬────────────┬──────┐
+        │ country     ┆ date       ┆ population ┆ gdp  │
+        │ ---         ┆ ---        ┆ ---        ┆ ---  │
+        │ str         ┆ date       ┆ f64        ┆ i64  │
+        ╞═════════════╪════════════╪════════════╪══════╡
+        │ Germany     ┆ 2016-03-01 ┆ 82.19      ┆ 4164 │
+        │ Germany     ┆ 2018-08-01 ┆ 82.66      ┆ 4696 │
+        │ Germany     ┆ 2019-01-01 ┆ 83.12      ┆ 4696 │
+        │ Netherlands ┆ 2016-03-01 ┆ 17.11      ┆ 784  │
+        │ Netherlands ┆ 2018-08-01 ┆ 17.32      ┆ 910  │
+        │ Netherlands ┆ 2019-01-01 ┆ 17.4       ┆ 910  │
+        └─────────────┴────────────┴────────────┴──────┘
+
         """
         if not isinstance(other, LazyFrame):
             msg = f"expected `other` join table to be a LazyFrame, not a {type(other).__name__!r}"
diff --git a/py-polars/polars/schema.py b/py-polars/polars/schema.py
index 718ffec75b93..019d2d2f3ad0 100644
--- a/py-polars/polars/schema.py
+++ b/py-polars/polars/schema.py
@@ -1,9 +1,13 @@
 from __future__ import annotations
 
 from collections import OrderedDict
-from typing import TYPE_CHECKING, Iterable, Mapping
+from collections.abc import Mapping
+from typing import TYPE_CHECKING, Iterable
+
+from polars.datatypes._parse import parse_into_dtype
 
 if TYPE_CHECKING:
+    from polars._typing import PythonDataType
     from polars.datatypes import DataType
 
     BaseSchema = OrderedDict[str, DataType]
@@ -49,10 +53,19 @@ class Schema(BaseSchema):
 
     def __init__(
         self,
-        schema: Mapping[str, DataType] | Iterable[tuple[str, DataType]] | None = None,
+        schema: (
+            Mapping[str, DataType | PythonDataType]
+            | Iterable[tuple[str, DataType | PythonDataType]]
+            | None
+        ) = None,
     ):
-        schema = schema or {}
-        super().__init__(schema)
+        input = (
+            schema.items() if schema and isinstance(schema, Mapping) else (schema or {})
+        )
+        super().__init__({name: parse_into_dtype(tp) for name, tp in input})  # type: ignore[misc]
+
+    def __setitem__(self, name: str, dtype: DataType | PythonDataType) -> None:
+        super().__setitem__(name, parse_into_dtype(dtype))  # type: ignore[assignment]
 
     def names(self) -> list[str]:
         """Get the column names of the schema."""
@@ -65,3 +78,15 @@ def dtypes(self) -> list[DataType]:
     def len(self) -> int:
         """Get the number of columns in the schema."""
         return len(self)
+
+    def to_python(self) -> dict[str, type]:
+        """
+        Return Schema as a dictionary of column names and their Python types.
+
+        Examples
+        --------
+        >>> s = pl.Schema({"x": pl.Int8(), "y": pl.String(), "z": pl.Duration("ms")})
+        >>> s.to_python()
+        {'x': <class 'int'>, 'y':  <class 'str'>, 'z': <class 'datetime.timedelta'>}
+        """
+        return {name: tp.to_python() for name, tp in self.items()}
diff --git a/py-polars/polars/selectors.py b/py-polars/polars/selectors.py
index a42f346c6562..2e56aa3fb91e 100644
--- a/py-polars/polars/selectors.py
+++ b/py-polars/polars/selectors.py
@@ -2494,7 +2494,7 @@ def starts_with(*prefix: str) -> SelectorType:
 
 def string(*, include_categorical: bool = False) -> SelectorType:
     """
-    Select all String (and, optionally, Categorical) string columns .
+    Select all String (and, optionally, Categorical) string columns.
 
     See Also
     --------
diff --git a/py-polars/polars/series/datetime.py b/py-polars/polars/series/datetime.py
index 8c8bfb32bad8..928c81410a62 100644
--- a/py-polars/polars/series/datetime.py
+++ b/py-polars/polars/series/datetime.py
@@ -1764,10 +1764,12 @@ def round(self, every: str | dt.timedelta | IntoExprColumn) -> Series:
             This functionality is considered **unstable**. It may be changed
             at any point without it being considered a breaking change.
 
-        Each date/datetime in the first half of the interval is mapped to the start of
-        its bucket.
-        Each date/datetime in the second half of the interval is mapped to the end of
-        its bucket.
+        - Each date/datetime in the first half of the interval
+          is mapped to the start of its bucket.
+        - Each date/datetime in the second half of the interval
+          is mapped to the end of its bucket.
+        - Half-way points are mapped to the start of their bucket.
+
         Ambiguous results are localized using the DST offset of the original timestamp -
         for example, rounding `'2022-11-06 01:20:00 CST'` by `'1h'` results in
         `'2022-11-06 01:00:00 CST'`, whereas rounding `'2022-11-06 01:20:00 CDT'` by
diff --git a/py-polars/polars/series/series.py b/py-polars/polars/series/series.py
index b61198fb9deb..901e424641fa 100644
--- a/py-polars/polars/series/series.py
+++ b/py-polars/polars/series/series.py
@@ -1232,7 +1232,31 @@ def __getitem__(self, key: MultiIndexSelector) -> Series: ...
     def __getitem__(
         self, key: SingleIndexSelector | MultiIndexSelector
     ) -> Any | Series:
-        """Get part of the Series as a new Series or scalar."""
+        """
+        Get part of the Series as a new Series or scalar.
+
+        Parameters
+        ----------
+        key
+            Row(s) to select.
+
+        Returns
+        -------
+        Series or scalar, depending on `key`.
+
+        Examples
+        --------
+        >>> s = pl.Series("a", [1, 4, 2])
+        >>> s[0]
+        1
+        >>> s[0:2]
+        shape: (2,)
+        Series: 'a' [i64]
+        [
+            1
+            4
+        ]
+        """
         return get_series_item_by_key(self, key)
 
     def __setitem__(
@@ -2404,7 +2428,7 @@ def hist(
             If None given, we determine the boundaries based on the data.
         bin_count
             If no bins provided, this will be used to determine
-            the distance of the bins
+            the distance of the bins.
         include_breakpoint
             Include a column that indicates the upper breakpoint.
         include_category
@@ -2418,18 +2442,17 @@ def hist(
         --------
         >>> a = pl.Series("a", [1, 3, 8, 8, 2, 1, 3])
         >>> a.hist(bin_count=4)
-        shape: (5, 3)
-        ┌────────────┬─────────────┬───────┐
-        │ breakpoint ┆ category    ┆ count │
-        │ ---        ┆ ---         ┆ ---   │
-        │ f64        ┆ cat         ┆ u32   │
-        ╞════════════╪═════════════╪═══════╡
-        │ 0.0        ┆ (-inf, 0.0] ┆ 0     │
-        │ 2.25       ┆ (0.0, 2.25] ┆ 3     │
-        │ 4.5        ┆ (2.25, 4.5] ┆ 2     │
-        │ 6.75       ┆ (4.5, 6.75] ┆ 0     │
-        │ inf        ┆ (6.75, inf] ┆ 2     │
-        └────────────┴─────────────┴───────┘
+        shape: (4, 3)
+        ┌────────────┬───────────────┬───────┐
+        │ breakpoint ┆ category      ┆ count │
+        │ ---        ┆ ---           ┆ ---   │
+        │ f64        ┆ cat           ┆ u32   │
+        ╞════════════╪═══════════════╪═══════╡
+        │ 2.75       ┆ (0.993, 2.75] ┆ 3     │
+        │ 4.5        ┆ (2.75, 4.5]   ┆ 2     │
+        │ 6.25       ┆ (4.5, 6.25]   ┆ 0     │
+        │ 8.0        ┆ (6.25, 8.0]   ┆ 2     │
+        └────────────┴───────────────┴───────┘
         """
         out = (
             self.to_frame()
diff --git a/py-polars/polars/series/string.py b/py-polars/polars/series/string.py
index 30c5ac90b5c1..cb6d9fbb999d 100644
--- a/py-polars/polars/series/string.py
+++ b/py-polars/polars/series/string.py
@@ -376,7 +376,7 @@ def contains(
         self, pattern: str | Expr, *, literal: bool = False, strict: bool = True
     ) -> Series:
         """
-        Check if strings in Series contain a substring that matches a regex.
+        Check if the string contains a substring that matches a pattern.
 
         Parameters
         ----------
@@ -480,7 +480,7 @@ def find(
 
         See Also
         --------
-        contains : Check if string contains a substring that matches a regex.
+        contains : Check if the string contains a substring that matches a pattern.
 
         Examples
         --------
@@ -535,7 +535,7 @@ def ends_with(self, suffix: str | Expr) -> Series:
 
         See Also
         --------
-        contains : Check if string contains a substring that matches a regex.
+        contains : Check if the string contains a substring that matches a pattern.
         starts_with : Check if string values start with a substring.
 
         Examples
@@ -562,7 +562,7 @@ def starts_with(self, prefix: str | Expr) -> Series:
 
         See Also
         --------
-        contains : Check if string contains a substring that matches a regex.
+        contains : Check if the string contains a substring that matches a pattern.
         ends_with : Check if string values end with a substring.
 
         Examples
diff --git a/py-polars/tests/unit/cloud/test_prepare_cloud_plan.py b/py-polars/tests/unit/cloud/test_prepare_cloud_plan.py
index 825c2c130e57..c54d0c7fc7ad 100644
--- a/py-polars/tests/unit/cloud/test_prepare_cloud_plan.py
+++ b/py-polars/tests/unit/cloud/test_prepare_cloud_plan.py
@@ -9,7 +9,6 @@
 from polars.exceptions import InvalidOperationError
 
 CLOUD_SOURCE = "s3://my-nonexistent-bucket/dataset"
-CLOUD_SINK = "s3://my-nonexistent-bucket/result"
 
 
 @pytest.mark.parametrize(
@@ -22,37 +21,20 @@
     ],
 )
 def test_prepare_cloud_plan(lf: pl.LazyFrame) -> None:
-    result = prepare_cloud_plan(lf, CLOUD_SINK)
+    result = prepare_cloud_plan(lf)
     assert isinstance(result, bytes)
 
     deserialized = pl.LazyFrame.deserialize(BytesIO(result))
     assert isinstance(deserialized, pl.LazyFrame)
 
 
-def test_prepare_cloud_plan_sink_added() -> None:
-    lf = pl.LazyFrame({"a": [1, 2], "b": [3, 4]})
-
-    result = prepare_cloud_plan(lf, CLOUD_SINK)
-
-    deserialized = pl.LazyFrame.deserialize(BytesIO(result))
-    assert "SINK (cloud)" in deserialized.explain()
-
-
-def test_prepare_cloud_plan_invalid_sink_uri() -> None:
-    lf = pl.LazyFrame({"a": [1, 2], "b": [3, 4]})
-    local_path = "~/local/result.parquet"
-
-    with pytest.raises(InvalidOperationError, match="non-cloud paths not supported"):
-        prepare_cloud_plan(lf, local_path)
-
-
 def test_prepare_cloud_plan_optimization_toggle() -> None:
     lf = pl.LazyFrame({"a": [1, 2], "b": [3, 4]})
 
     with pytest.raises(TypeError, match="unexpected keyword argument"):
-        prepare_cloud_plan(lf, CLOUD_SINK, nonexistent_optimization=False)
+        prepare_cloud_plan(lf, nonexistent_optimization=False)
 
-    result = prepare_cloud_plan(lf, CLOUD_SINK, projection_pushdown=False)
+    result = prepare_cloud_plan(lf, projection_pushdown=False)
     assert isinstance(result, bytes)
 
     # TODO: How to check that this optimization was toggled correctly?
@@ -92,7 +74,7 @@ def test_prepare_cloud_plan_fail_on_udf(lf: pl.LazyFrame) -> None:
         InvalidOperationError,
         match="logical plan ineligible for execution on Polars Cloud",
     ):
-        prepare_cloud_plan(lf, CLOUD_SINK)
+        prepare_cloud_plan(lf)
 
 
 @pytest.mark.parametrize(
@@ -109,7 +91,7 @@ def test_prepare_cloud_plan_fail_on_local_data_source(lf: pl.LazyFrame) -> None:
         InvalidOperationError,
         match="logical plan ineligible for execution on Polars Cloud",
     ):
-        prepare_cloud_plan(lf, CLOUD_SINK)
+        prepare_cloud_plan(lf)
 
 
 @pytest.mark.write_disk()
@@ -124,4 +106,4 @@ def test_prepare_cloud_plan_fail_on_python_scan(tmp_path: Path) -> None:
         InvalidOperationError,
         match="logical plan ineligible for execution on Polars Cloud",
     ):
-        prepare_cloud_plan(lf, CLOUD_SINK)
+        prepare_cloud_plan(lf)
diff --git a/py-polars/tests/unit/constructors/test_series.py b/py-polars/tests/unit/constructors/test_series.py
index cfc0c76b6dc2..c31a5b48ce68 100644
--- a/py-polars/tests/unit/constructors/test_series.py
+++ b/py-polars/tests/unit/constructors/test_series.py
@@ -5,6 +5,7 @@
 from typing import TYPE_CHECKING, Any
 
 import numpy as np
+import pandas as pd
 import pytest
 
 import polars as pl
@@ -148,6 +149,12 @@ def test_series_init_np_temporal_with_nat_15518() -> None:
     assert_series_equal(result, expected)
 
 
+def test_series_init_pandas_timestamp_18127() -> None:
+    result = pl.Series([pd.Timestamp("2000-01-01T00:00:00.123456789", tz="UTC")])
+    # Note: time unit is not (yet) respected, it should be Datetime('ns', 'UTC').
+    assert result.dtype == pl.Datetime("us", "UTC")
+
+
 def test_series_init_np_2d_zero_zero_shape() -> None:
     arr = np.array([]).reshape(0, 0)
     with pytest.raises(
diff --git a/py-polars/tests/unit/dataframe/test_upsample.py b/py-polars/tests/unit/dataframe/test_upsample.py
index 163245fb1502..21160ad54df8 100644
--- a/py-polars/tests/unit/dataframe/test_upsample.py
+++ b/py-polars/tests/unit/dataframe/test_upsample.py
@@ -216,3 +216,71 @@ def test_upsample_index_invalid(
             every="1h",
             maintain_order=maintain_order,
         )
+
+
+def test_upsample_sorted_only_within_group() -> None:
+    df = pl.DataFrame(
+        {
+            "time": [
+                datetime(2021, 4, 1),
+                datetime(2021, 2, 1),
+                datetime(2021, 5, 1),
+                datetime(2021, 6, 1),
+            ],
+            "admin": ["Netherlands", "Åland", "Åland", "Netherlands"],
+            "test2": [1, 0, 2, 3],
+        }
+    )
+
+    up = df.upsample(
+        time_column="time",
+        every="1mo",
+        group_by="admin",
+        maintain_order=True,
+    ).select(pl.all().forward_fill())
+
+    expected = pl.DataFrame(
+        {
+            "time": [
+                datetime(2021, 4, 1, 0, 0),
+                datetime(2021, 5, 1, 0, 0),
+                datetime(2021, 6, 1, 0, 0),
+                datetime(2021, 2, 1, 0, 0),
+                datetime(2021, 3, 1, 0, 0),
+                datetime(2021, 4, 1, 0, 0),
+                datetime(2021, 5, 1, 0, 0),
+            ],
+            "admin": [
+                "Netherlands",
+                "Netherlands",
+                "Netherlands",
+                "Åland",
+                "Åland",
+                "Åland",
+                "Åland",
+            ],
+            "test2": [1, 1, 3, 0, 0, 0, 2],
+        }
+    )
+
+    assert_frame_equal(up, expected)
+
+
+def test_upsample_sorted_only_within_group_but_no_group_by_provided() -> None:
+    df = pl.DataFrame(
+        {
+            "time": [
+                datetime(2021, 4, 1),
+                datetime(2021, 2, 1),
+                datetime(2021, 5, 1),
+                datetime(2021, 6, 1),
+            ],
+            "admin": ["Netherlands", "Åland", "Åland", "Netherlands"],
+            "test2": [1, 0, 2, 3],
+        }
+    )
+    with pytest.raises(
+        InvalidOperationError,
+        match=r"argument in operation 'upsample' is not sorted, please sort the 'expr/series/column' first",
+    ):
+        df.upsample(time_column="time", every="1mo")
diff --git a/py-polars/tests/unit/io/test_lazy_count_star.py b/py-polars/tests/unit/io/test_lazy_count_star.py
index 222f0cb25607..a20f4ea75a36 100644
--- a/py-polars/tests/unit/io/test_lazy_count_star.py
+++ b/py-polars/tests/unit/io/test_lazy_count_star.py
@@ -5,6 +5,7 @@
 if TYPE_CHECKING:
     from pathlib import Path
 
+import gzip
 from tempfile import NamedTemporaryFile
 
 import pytest
@@ -82,3 +83,30 @@ def test_count_ndjson(io_files_path: Path, path: str, n_rows: int) -> None:
     # Check if we are using our fast count star
     assert "FAST_COUNT" in lf.explain()
     assert_frame_equal(lf.collect(), expected)
+
+
+def test_count_compressed_csv_18057(io_files_path: Path) -> None:
+    csv_file = io_files_path / "gzipped.csv.gz"
+
+    expected = pl.DataFrame(
+        {"a": [1, 2, 3], "b": ["a", "b", "c"], "c": [1.0, 2.0, 3.0]}
+    )
+    lf = pl.scan_csv(csv_file, truncate_ragged_lines=True)
+    out = lf.collect()
+    assert_frame_equal(out, expected)
+    # This also tests:
+    # #18070 "CSV count_rows does not skip empty lines at file start"
+    # as the file has an empty line at the beginning.
+    assert lf.select(pl.len()).collect().item() == 3
+
+
+def test_count_compressed_ndjson(tmp_path: Path) -> None:
+    tmp_path.mkdir(exist_ok=True)
+    path = tmp_path / "data.jsonl.gz"
+    df = pl.DataFrame({"x": range(5)})
+
+    with gzip.open(path, "wb") as f:
+        df.write_ndjson(f)
+
+    lf = pl.scan_ndjson(path)
+    assert lf.select(pl.len()).collect().item() == 5
diff --git a/py-polars/tests/unit/io/test_lazy_csv.py b/py-polars/tests/unit/io/test_lazy_csv.py
index c2351ec109bc..5672c4b1b7c4 100644
--- a/py-polars/tests/unit/io/test_lazy_csv.py
+++ b/py-polars/tests/unit/io/test_lazy_csv.py
@@ -438,18 +438,3 @@ def test_scan_csv_with_column_names_nonexistent_file() -> None:
     # Upon collection, it should fail
     with pytest.raises(FileNotFoundError):
         result.collect()
-
-
-def test_scan_csv_compressed_row_count_18057(io_files_path: Path) -> None:
-    csv_file = io_files_path / "gzipped.csv.gz"
-
-    expected = pl.DataFrame(
-        {"a": [1, 2, 3], "b": ["a", "b", "c"], "c": [1.0, 2.0, 3.0]}
-    )
-    lf = pl.scan_csv(csv_file, truncate_ragged_lines=True)
-    out = lf.collect()
-    assert_frame_equal(out, expected)
-    # This also tests:
-    # #18070 "CSV count_rows does not skip empty lines at file start"
-    # as the file has an empty line at the beginning.
-    assert lf.select(pl.len()).collect().item() == 3
diff --git a/py-polars/tests/unit/io/test_parquet.py b/py-polars/tests/unit/io/test_parquet.py
index 2f9c3436c4be..f93090ad8302 100644
--- a/py-polars/tests/unit/io/test_parquet.py
+++ b/py-polars/tests/unit/io/test_parquet.py
@@ -3,7 +3,7 @@
 import io
 from datetime import datetime, time, timezone
 from decimal import Decimal
-from typing import TYPE_CHECKING, Any, cast
+from typing import TYPE_CHECKING, Any, Literal, cast
 
 import fsspec
 import numpy as np
@@ -18,7 +18,7 @@
 import polars as pl
 from polars.exceptions import ComputeError
 from polars.testing import assert_frame_equal, assert_series_equal
-from polars.testing.parametric import dataframes
+from polars.testing.parametric import column, dataframes
 
 if TYPE_CHECKING:
     from pathlib import Path
@@ -1059,8 +1059,8 @@ def test_hybrid_rle() -> None:
     f = io.BytesIO()
     df.write_parquet(f)
     f.seek(0)
-    for column in pq.ParquetFile(f).metadata.to_dict()["row_groups"][0]["columns"]:
-        assert "RLE_DICTIONARY" in column["encodings"]
+    for col in pq.ParquetFile(f).metadata.to_dict()["row_groups"][0]["columns"]:
+        assert "RLE_DICTIONARY" in col["encodings"]
     f.seek(0)
     assert_frame_equal(pl.read_parquet(f), df)
 
@@ -1450,9 +1450,6 @@ def test_null_array_dict_pages_18085() -> None:
     row_group_size=st.integers(min_value=10, max_value=1000),
 )
 def test_delta_encoding_roundtrip(df: pl.DataFrame, row_group_size: int) -> None:
-    print(df.schema)
-    print(df)
-
     f = io.BytesIO()
     pq.write_table(
         df.to_arrow(),
@@ -1510,3 +1507,226 @@ def test_delta_strings_encoding_roundtrip(
 
     f.seek(0)
     assert_frame_equal(pl.read_parquet(f), df)
+
+
+EQUALITY_OPERATORS = ["__eq__", "__lt__", "__le__", "__gt__", "__ge__"]
+BOOLEAN_OPERATORS = ["__or__", "__and__"]
+
+
+@given(
+    df=dataframes(
+        min_size=0, max_size=100, min_cols=2, max_cols=5, allowed_dtypes=[pl.Int32]
+    ),
+    first_op=st.sampled_from(EQUALITY_OPERATORS),
+    second_op=st.sampled_from(
+        [None]
+        + [
+            (booljoin, eq)
+            for booljoin in BOOLEAN_OPERATORS
+            for eq in EQUALITY_OPERATORS
+        ]
+    ),
+    l1=st.integers(min_value=0, max_value=1000),
+    l2=st.integers(min_value=0, max_value=1000),
+    r1=st.integers(min_value=0, max_value=1000),
+    r2=st.integers(min_value=0, max_value=1000),
+)
+@pytest.mark.parametrize("parallel_st", ["auto", "prefiltered"])
+@settings(
+    deadline=None,
+    suppress_health_check=[HealthCheck.function_scoped_fixture],
+)
+@pytest.mark.write_disk()
+def test_predicate_filtering(
+    tmp_path: Path,
+    df: pl.DataFrame,
+    first_op: str,
+    second_op: None | tuple[str, str],
+    l1: int,
+    l2: int,
+    r1: int,
+    r2: int,
+    parallel_st: Literal["auto", "prefiltered"],
+) -> None:
+    tmp_path.mkdir(exist_ok=True)
+    f = tmp_path / "test.parquet"
+
+    df.write_parquet(f, row_group_size=5)
+
+    cols = df.columns
+
+    l1s = cols[l1 % len(cols)]
+    l2s = cols[l2 % len(cols)]
+    expr = (getattr(pl.col(l1s), first_op))(pl.col(l2s))
+
+    if second_op is not None:
+        r1s = cols[r1 % len(cols)]
+        r2s = cols[r2 % len(cols)]
+        expr = getattr(expr, second_op[0])(
+            (getattr(pl.col(r1s), second_op[1]))(pl.col(r2s))
+        )
+
+    result = pl.scan_parquet(f, parallel=parallel_st).filter(expr).collect()
+    assert_frame_equal(result, df.filter(expr))
+
+
+@given(
+    df=dataframes(
+        min_size=1,
+        max_size=5,
+        min_cols=1,
+        max_cols=1,
+        excluded_dtypes=[pl.Decimal, pl.Categorical, pl.Enum],
+    ),
+    offset=st.integers(0, 100),
+    length=st.integers(0, 100),
+)
+@settings(
+    suppress_health_check=[HealthCheck.function_scoped_fixture],
+)
+@pytest.mark.write_disk()
+def test_slice_roundtrip(
+    df: pl.DataFrame, offset: int, length: int, tmp_path: Path
+) -> None:
+    tmp_path.mkdir(exist_ok=True)
+    f = tmp_path / "test.parquet"
+
+    offset %= df.height + 1
+    length %= df.height - offset + 1
+
+    df.write_parquet(f)
+
+    scanned = pl.scan_parquet(f).slice(offset, length).collect()
+    assert_frame_equal(scanned, df.slice(offset, length))
+
+
+@pytest.mark.write_disk()
+def test_struct_prefiltered(tmp_path: Path) -> None:
+    tmp_path.mkdir(exist_ok=True)
+    f = tmp_path / "test.parquet"
+
+    df = pl.DataFrame({"a": {"x": 1, "y": 2}})
+    df.write_parquet(f)
+
+    (
+        pl.scan_parquet(f, parallel="prefiltered")
+        .filter(pl.col("a").struct.field("x") == 1)
+        .collect()
+    )
+
+
+@pytest.mark.parametrize(
+    "data",
+    [
+        (
+            [{"x": ""}, {"x": "0"}],
+            pa.struct([pa.field("x", pa.string(), nullable=True)]),
+        ),
+        (
+            [{"x": ""}, {"x": "0"}],
+            pa.struct([pa.field("x", pa.string(), nullable=False)]),
+        ),
+        ([[""], ["0"]], pa.list_(pa.field("item", pa.string(), nullable=False))),
+        ([[""], ["0"]], pa.list_(pa.field("item", pa.string(), nullable=True))),
+        ([[""], ["0"]], pa.list_(pa.field("item", pa.string(), nullable=False), 1)),
+        ([[""], ["0"]], pa.list_(pa.field("item", pa.string(), nullable=True), 1)),
+        (
+            [["", "1"], ["0", "2"]],
+            pa.list_(pa.field("item", pa.string(), nullable=False), 2),
+        ),
+        (
+            [["", "1"], ["0", "2"]],
+            pa.list_(pa.field("item", pa.string(), nullable=True), 2),
+        ),
+    ],
+)
+@pytest.mark.parametrize("nullable", [False, True])
+@pytest.mark.write_disk()
+def test_nested_skip_18303(
+    data: tuple[list[dict[str, str] | list[str]], pa.DataType],
+    nullable: bool,
+    tmp_path: Path,
+) -> None:
+    tmp_path.mkdir(exist_ok=True)
+    f = tmp_path / "test.parquet"
+
+    schema = pa.schema([pa.field("a", data[1], nullable=nullable)])
+    tb = pa.table({"a": data[0]}, schema=schema)
+    pq.write_table(tb, f)
+
+    scanned = pl.scan_parquet(f).slice(1, 1).collect()
+
+    assert_frame_equal(scanned, pl.DataFrame(tb).slice(1, 1))
+
+
+@given(
+    df=dataframes(
+        min_size=0,
+        max_size=100,
+        min_cols=2,
+        max_cols=5,
+        allowed_dtypes=[pl.String, pl.Binary],
+        include_cols=[
+            column("filter_col", pl.Int8, st.integers(0, 1), allow_null=False)
+        ],
+    ),
+)
+@settings(
+    deadline=None,
+    suppress_health_check=[HealthCheck.function_scoped_fixture],
+)
+@pytest.mark.write_disk()
+def test_delta_length_byte_array_prefiltering(
+    tmp_path: Path,
+    df: pl.DataFrame,
+) -> None:
+    tmp_path.mkdir(exist_ok=True)
+    f = tmp_path / "test.parquet"
+
+    cols = df.columns
+
+    encodings = {col: "DELTA_LENGTH_BYTE_ARRAY" for col in cols}
+    encodings["filter_col"] = "PLAIN"
+
+    pq.write_table(
+        df.to_arrow(),
+        f,
+        use_dictionary=False,
+        column_encoding=encodings,
+    )
+
+    expr = pl.col("filter_col") == 0
+    result = pl.scan_parquet(f, parallel="prefiltered").filter(expr).collect()
+    assert_frame_equal(result, df.filter(expr))
+
+
+@given(
+    df=dataframes(
+        min_size=0,
+        max_size=10,
+        min_cols=1,
+        max_cols=5,
+        excluded_dtypes=[pl.Decimal, pl.Categorical, pl.Enum],
+        include_cols=[
+            column("filter_col", pl.Int8, st.integers(0, 1), allow_null=False)
+        ],
+    ),
+)
+@settings(
+    deadline=None,
+    suppress_health_check=[HealthCheck.function_scoped_fixture],
+)
+@pytest.mark.write_disk()
+def test_general_prefiltering(
+    tmp_path: Path,
+    df: pl.DataFrame,
+) -> None:
+    tmp_path.mkdir(exist_ok=True)
+    f = tmp_path / "test.parquet"
+
+    df.write_parquet(f)
+
+    expr = pl.col("filter_col") == 0
+
+    result = pl.scan_parquet(f, parallel="prefiltered").filter(expr).collect()
+    assert_frame_equal(result, df.filter(expr))
diff --git a/py-polars/tests/unit/io/test_scan.py b/py-polars/tests/unit/io/test_scan.py
index a1094ec778f0..8476b8b5f666 100644
--- a/py-polars/tests/unit/io/test_scan.py
+++ b/py-polars/tests/unit/io/test_scan.py
@@ -640,18 +640,14 @@ def test_scan_include_file_name(
     streaming: bool,
 ) -> None:
     tmp_path.mkdir(exist_ok=True)
-    paths: list[Path] = []
     dfs: list[pl.DataFrame] = []
 
     for x in ["1", "2"]:
-        paths.append(Path(f"{tmp_path}/{x}.bin").absolute())
-        dfs.append(pl.DataFrame({"x": x}))
-        write_func(dfs[-1], paths[-1])
-
-    df = pl.concat(dfs).with_columns(
-        pl.Series("path", map(str, paths), dtype=pl.String)
-    )
+        path = Path(f"{tmp_path}/{x}.bin").absolute()
+        dfs.append(pl.DataFrame({"x": 10 * [x]}).with_columns(path=pl.lit(str(path))))
+        write_func(dfs[-1].drop("path"), path)
 
+    df = pl.concat(dfs)
     assert df.columns == ["x", "path"]
 
     with pytest.raises(
diff --git a/py-polars/tests/unit/lazyframe/test_serde.py b/py-polars/tests/unit/lazyframe/test_serde.py
index 515ce490693e..86a5c932b7f5 100644
--- a/py-polars/tests/unit/lazyframe/test_serde.py
+++ b/py-polars/tests/unit/lazyframe/test_serde.py
@@ -40,6 +40,7 @@ def test_lf_serde_roundtrip_binary(lf: pl.LazyFrame) -> None:
         ],
     )
 )
+@pytest.mark.filterwarnings("ignore")
 def test_lf_serde_roundtrip_json(lf: pl.LazyFrame) -> None:
     serialized = lf.serialize(format="json")
     result = pl.LazyFrame.deserialize(io.StringIO(serialized), format="json")
@@ -52,13 +53,7 @@ def lf() -> pl.LazyFrame:
     return pl.LazyFrame({"a": [1, 2, 3], "b": ["a", "b", "c"]}).select("a").sum()
 
 
-def test_lf_serde(lf: pl.LazyFrame) -> None:
-    serialized = lf.serialize()
-    assert isinstance(serialized, bytes)
-    result = pl.LazyFrame.deserialize(io.BytesIO(serialized))
-    assert_frame_equal(result, lf)
-
-
+@pytest.mark.filterwarnings("ignore")
 def test_lf_serde_json_stringio(lf: pl.LazyFrame) -> None:
     serialized = lf.serialize(format="json")
     assert isinstance(serialized, str)
@@ -66,6 +61,13 @@ def test_lf_serde_json_stringio(lf: pl.LazyFrame) -> None:
     assert_frame_equal(result, lf)
 
 
+def test_lf_serde(lf: pl.LazyFrame) -> None:
+    serialized = lf.serialize()
+    assert isinstance(serialized, bytes)
+    result = pl.LazyFrame.deserialize(io.BytesIO(serialized))
+    assert_frame_equal(result, lf)
+
+
 @pytest.mark.parametrize(
     ("format", "buf"),
     [
@@ -74,6 +76,7 @@ def test_lf_serde_json_stringio(lf: pl.LazyFrame) -> None:
         ("json", io.BytesIO()),
     ],
 )
+@pytest.mark.filterwarnings("ignore")
 def test_lf_serde_to_from_buffer(
     lf: pl.LazyFrame, format: SerializationFormat, buf: io.IOBase
 ) -> None:
diff --git a/py-polars/tests/unit/operations/namespaces/temporal/test_datetime.py b/py-polars/tests/unit/operations/namespaces/temporal/test_datetime.py
index b212c7be133d..cd4ee34bde71 100644
--- a/py-polars/tests/unit/operations/namespaces/temporal/test_datetime.py
+++ b/py-polars/tests/unit/operations/namespaces/temporal/test_datetime.py
@@ -1427,3 +1427,42 @@ def test_literal_from_datetime(
 
     assert out.schema == OrderedDict({"literal": dtype})
     assert out.item() == value
+
+
+@pytest.mark.parametrize(
+    "value",
+    [
+        time(0),
+        time(hour=1),
+        time(hour=16, minute=43, microsecond=500),
+        time(hour=23, minute=59, second=59, microsecond=999999),
+    ],
+)
+def test_literal_from_time(value: time) -> None:
+    out = pl.select(pl.lit(value))
+    assert out.schema == OrderedDict({"literal": pl.Time})
+    assert out.item() == value
+
+
+@pytest.mark.parametrize(
+    "dtype",
+    [
+        None,
+        pl.Duration("ms"),
+        pl.Duration("us"),
+        pl.Duration("ns"),
+    ],
+)
+@pytest.mark.parametrize(
+    "value",
+    [
+        timedelta(0),
+        timedelta(hours=1),
+        timedelta(days=-99999),
+        timedelta(days=99999),
+    ],
+)
+def test_literal_from_timedelta(value: time, dtype: pl.Duration | None) -> None:
+    out = pl.select(pl.lit(value, dtype=dtype))
+    assert out.schema == OrderedDict({"literal": dtype or pl.Duration("us")})
+    assert out.item() == value
diff --git a/py-polars/tests/unit/operations/namespaces/temporal/test_round.py b/py-polars/tests/unit/operations/namespaces/temporal/test_round.py
index 1ac7acc3edcd..49ed4328b8f0 100644
--- a/py-polars/tests/unit/operations/namespaces/temporal/test_round.py
+++ b/py-polars/tests/unit/operations/namespaces/temporal/test_round.py
@@ -189,3 +189,51 @@ def test_round_datetime_w_expression(time_unit: TimeUnit) -> None:
     result = df.select(pl.col("a").dt.round(pl.col("b")))["a"]
     assert result[0] == datetime(2020, 1, 1)
     assert result[1] == datetime(2020, 1, 21)
+
+
+@pytest.mark.parametrize(
+    ("time_unit", "expected"),
+    [
+        ("ms", 0),
+        ("us", 0),
+        ("ns", 0),
+    ],
+)
+def test_round_negative_towards_epoch_18239(time_unit: TimeUnit, expected: int) -> None:
+    s = pl.Series([datetime(1970, 1, 1)], dtype=pl.Datetime(time_unit))
+    s = s.dt.offset_by(f"-1{time_unit}")
+    result = s.dt.round(f"2{time_unit}").dt.timestamp(time_unit="ns").item()
+    assert result == expected
+    result = (
+        s.dt.replace_time_zone("Europe/London")
+        .dt.round(f"2{time_unit}")
+        .dt.replace_time_zone(None)
+        .dt.timestamp(time_unit="ns")
+        .item()
+    )
+    assert result == expected
+
+
+@pytest.mark.parametrize(
+    ("time_unit", "expected"),
+    [
+        ("ms", 2_000_000),
+        ("us", 2_000),
+        ("ns", 2),
+    ],
+)
+def test_round_positive_away_from_epoch_18239(
+    time_unit: TimeUnit, expected: int
+) -> None:
+    s = pl.Series([datetime(1970, 1, 1)], dtype=pl.Datetime(time_unit))
+    s = s.dt.offset_by(f"1{time_unit}")
+    result = s.dt.round(f"2{time_unit}").dt.timestamp(time_unit="ns").item()
+    assert result == expected
+    result = (
+        s.dt.replace_time_zone("Europe/London")
+        .dt.round(f"2{time_unit}")
+        .dt.replace_time_zone(None)
+        .dt.timestamp(time_unit="ns")
+        .item()
+    )
+    assert result == expected
diff --git a/py-polars/tests/unit/operations/rolling/test_rolling.py b/py-polars/tests/unit/operations/rolling/test_rolling.py
index 8e5bbfd69bd1..d934683d645f 100644
--- a/py-polars/tests/unit/operations/rolling/test_rolling.py
+++ b/py-polars/tests/unit/operations/rolling/test_rolling.py
@@ -589,6 +589,44 @@ def test_rolling_cov_corr() -> None:
     assert res["corr"][:2] == [None] * 2
 
 
+def test_rolling_cov_corr_nulls() -> None:
+    df1 = pl.DataFrame(
+        {"a": [1.06, 1.07, 0.93, 0.78, 0.85], "lag_a": [1.0, 1.06, 1.07, 0.93, 0.78]}
+    )
+    df2 = pl.DataFrame(
+        {
+            "a": [1.0, 1.06, 1.07, 0.93, 0.78, 0.85],
+            "lag_a": [None, 1.0, 1.06, 1.07, 0.93, 0.78],
+        }
+    )
+
+    val_1 = df1.select(
+        pl.rolling_corr("a", "lag_a", window_size=10, min_periods=5, ddof=1)
+    )
+    val_2 = df2.select(
+        pl.rolling_corr("a", "lag_a", window_size=10, min_periods=5, ddof=1)
+    )
+
+    df1_expected = pl.DataFrame({"a": [None, None, None, None, 0.62204709]})
+    df2_expected = pl.DataFrame({"a": [None, None, None, None, None, 0.62204709]})
+
+    assert_frame_equal(val_1, df1_expected, atol=0.0000001)
+    assert_frame_equal(val_2, df2_expected, atol=0.0000001)
+
+    val_1 = df1.select(
+        pl.rolling_cov("a", "lag_a", window_size=10, min_periods=5, ddof=1)
+    )
+    val_2 = df2.select(
+        pl.rolling_cov("a", "lag_a", window_size=10, min_periods=5, ddof=1)
+    )
+
+    df1_expected = pl.DataFrame({"a": [None, None, None, None, 0.009445]})
+    df2_expected = pl.DataFrame({"a": [None, None, None, None, None, 0.009445]})
+
+    assert_frame_equal(val_1, df1_expected, atol=0.0000001)
+    assert_frame_equal(val_2, df2_expected, atol=0.0000001)
+
+
 @pytest.mark.parametrize("time_unit", ["ms", "us", "ns"])
 def test_rolling_empty_window_9406(time_unit: TimeUnit) -> None:
     datecol = pl.Series(
diff --git a/py-polars/tests/unit/operations/test_filter.py b/py-polars/tests/unit/operations/test_filter.py
index df796b44b991..eed550fac516 100644
--- a/py-polars/tests/unit/operations/test_filter.py
+++ b/py-polars/tests/unit/operations/test_filter.py
@@ -285,3 +285,16 @@ def test_filter_group_aware_17030() -> None:
         (group_count > 2) & (group_cum_count > 1) & (group_cum_count < group_count)
     )
     assert df.filter(filter_expr)["foo"].to_list() == ["1", "2"]
+
+
+def test_invalid_filter_18295() -> None:
+    codes = ["a"] * 5 + ["b"] * 5
+    values = list(range(-2, 3)) + list(range(2, -3, -1))
+    df = pl.DataFrame({"code": codes, "value": values})
+    with pytest.raises(pl.exceptions.ShapeError):
+        df.group_by("code").agg(
+            pl.col("value")
+            .ewm_mean(span=2, ignore_nulls=True)
+            .tail(3)
+            .filter(pl.col("value") > 0),
+        ).sort("code")
diff --git a/py-polars/tests/unit/operations/test_slice.py b/py-polars/tests/unit/operations/test_slice.py
index 7c8fb22665c1..692fcb5634dc 100644
--- a/py-polars/tests/unit/operations/test_slice.py
+++ b/py-polars/tests/unit/operations/test_slice.py
@@ -243,3 +243,33 @@ def test_double_sort_slice_pushdown_15779() -> None:
     assert (
         pl.LazyFrame({"foo": [1, 2]}).sort("foo").head(0).sort("foo").collect()
     ).shape == (0, 1)
+
+
+def test_slice_pushdown_simple_projection_18288() -> None:
+    lf = pl.DataFrame({"col": ["0", "notanumber"]}).lazy()
+    lf = lf.with_columns([pl.col("col").cast(pl.Int64)])
+    lf = lf.with_columns([pl.col("col"), pl.lit(None)])
+    assert lf.head(1).collect().to_dict(as_series=False) == {
+        "col": [0],
+        "literal": [None],
+    }
+
+
+def test_group_by_slice_all_keys() -> None:
+    df = pl.DataFrame(
+        {
+            "a": ["Tom", "Nick", "Marry", "Krish", "Jack", None],
+            "b": [
+                "2020-01-01",
+                "2020-01-02",
+                "2020-01-03",
+                "2020-01-04",
+                "2020-01-05",
+                None,
+            ],
+            "c": [5, 6, 6, 7, 8, 5],
+        }
+    )
+
+    gb = df.group_by(["a", "b", "c"], maintain_order=True)
+    assert_frame_equal(gb.tail(1), gb.head(1))
diff --git a/py-polars/tests/unit/operations/test_statistics.py b/py-polars/tests/unit/operations/test_statistics.py
index 8aa1b0ae6811..ed8b964582cb 100644
--- a/py-polars/tests/unit/operations/test_statistics.py
+++ b/py-polars/tests/unit/operations/test_statistics.py
@@ -7,6 +7,7 @@
 import pytest
 
 import polars as pl
+from polars import StringCache
 from polars.testing import assert_frame_equal
 
 
@@ -37,16 +38,21 @@ def test_corr_nan() -> None:
     assert str(df.select(pl.corr("a", "b", ddof=1))[0, 0]) == "nan"
 
 
+@StringCache()
 def test_hist() -> None:
-    a = pl.Series("a", [1, 3, 8, 8, 2, 1, 3])
-    assert (
-        str(a.hist(bin_count=4).to_dict(as_series=False))
-        == "{'breakpoint': [0.0, 2.25, 4.5, 6.75, inf], 'category': ['(-inf, 0.0]', '(0.0, 2.25]', '(2.25, 4.5]', '(4.5, 6.75]', '(6.75, inf]'], 'count': [0, 3, 2, 0, 2]}"
+    s = pl.Series("a", [1, 3, 8, 8, 2, 1, 3])
+    out = s.hist(bin_count=4)
+    expected = pl.DataFrame(
+        {
+            "breakpoint": pl.Series([2.75, 4.5, 6.25, 8.0], dtype=pl.Float64),
+            "category": pl.Series(
+                ["(0.993, 2.75]", "(2.75, 4.5]", "(4.5, 6.25]", "(6.25, 8.0]"],
+                dtype=pl.Categorical,
+            ),
+            "count": pl.Series([3, 2, 0, 2], dtype=pl.get_index_type()),
+        }
     )
-
-    assert a.hist(
-        bins=[0, 2], include_category=False, include_breakpoint=False
-    ).to_series().to_list() == [0, 3, 4]
+    assert_frame_equal(out, expected, categorical_as_str=True)
 
 
 @pytest.mark.parametrize("values", [[], [None]])
diff --git a/py-polars/tests/unit/sql/test_set_ops.py b/py-polars/tests/unit/sql/test_set_ops.py
index 64508887d1c5..f148d561c31b 100644
--- a/py-polars/tests/unit/sql/test_set_ops.py
+++ b/py-polars/tests/unit/sql/test_set_ops.py
@@ -69,6 +69,26 @@ def test_except_intersect_by_name() -> None:
     assert res_i.columns == ["x", "y", "z"]
 
 
+@pytest.mark.parametrize(
+    ("op", "op_subtype"),
+    [
+        ("EXCEPT", "ALL"),
+        ("EXCEPT", "ALL BY NAME"),
+        ("INTERSECT", "ALL"),
+        ("INTERSECT", "ALL BY NAME"),
+    ],
+)
+def test_except_intersect_all_unsupported(op: str, op_subtype: str) -> None:
+    df1 = pl.DataFrame({"n": [1, 1, 1, 2, 2, 2, 3]})  # noqa: F841
+    df2 = pl.DataFrame({"n": [1, 1, 2, 2]})  # noqa: F841
+
+    with pytest.raises(
+        SQLInterfaceError,
+        match=f"'{op} {op_subtype}' is not supported",
+    ):
+        pl.sql(f"SELECT * FROM df1 {op} {op_subtype} SELECT * FROM df2")
+
+
 @pytest.mark.parametrize("op", ["EXCEPT", "INTERSECT", "UNION"])
 def test_except_intersect_errors(op: str) -> None:
     df1 = pl.DataFrame({"x": [1, 9, 1, 1], "y": [2, 3, 4, 4], "z": [5, 5, 5, 5]})  # noqa: F841
diff --git a/py-polars/tests/unit/test_errors.py b/py-polars/tests/unit/test_errors.py
index 244555715657..2087387b1a8a 100644
--- a/py-polars/tests/unit/test_errors.py
+++ b/py-polars/tests/unit/test_errors.py
@@ -1,7 +1,7 @@
 from __future__ import annotations
 
 import io
-from datetime import date, datetime, time
+from datetime import date, datetime, time, tzinfo
 from decimal import Decimal
 from typing import TYPE_CHECKING, Any
 
@@ -326,10 +326,16 @@ def test_datetime_time_add_err() -> None:
 def test_invalid_dtype() -> None:
     with pytest.raises(
         TypeError,
-        match="cannot parse input of type 'str' into Polars data type: 'mayonnaise'",
+        match=r"cannot parse input of type 'str' into Polars data type \(given: 'mayonnaise'\)",
     ):
         pl.Series([1, 2], dtype="mayonnaise")  # type: ignore[arg-type]
 
+    with pytest.raises(
+        TypeError,
+        match="cannot parse input <class 'datetime.tzinfo'> into Polars data type",
+    ):
+        pl.Series([None], dtype=tzinfo)  # type: ignore[arg-type]
+
 
 def test_arr_eval_named_cols() -> None:
     df = pl.DataFrame({"A": ["a", "b"], "B": [["a", "b"], ["c", "d"]]})
@@ -484,7 +490,7 @@ def test_skip_nulls_err() -> None:
 
     with pytest.raises(
         ComputeError,
-        match=r"The output type of the 'apply' function cannot be determined",
+        match=r"The output type of the 'map_elements' function cannot be determined",
     ):
         df.with_columns(pl.col("foo").map_elements(lambda x: x, skip_nulls=True))
 
diff --git a/py-polars/tests/unit/test_schema.py b/py-polars/tests/unit/test_schema.py
index 8ccb4497ac0f..4c09c986eeb9 100644
--- a/py-polars/tests/unit/test_schema.py
+++ b/py-polars/tests/unit/test_schema.py
@@ -1,4 +1,5 @@
 import pickle
+from datetime import datetime
 
 import polars as pl
 
@@ -14,20 +15,29 @@ def test_schema() -> None:
 
 
 def test_schema_parse_nonpolars_dtypes() -> None:
-    # Currently, no parsing is being done.
-    s = pl.Schema({"foo": pl.List, "bar": int})  # type: ignore[arg-type]
+    cardinal_directions = pl.Enum(["north", "south", "east", "west"])
+
+    s = pl.Schema({"foo": pl.List, "bar": int, "baz": cardinal_directions})  # type: ignore[arg-type]
+    s["ham"] = datetime
 
     assert s["foo"] == pl.List
-    assert s["bar"] is int
-    assert s.len() == 2
-    assert s.names() == ["foo", "bar"]
-    assert s.dtypes() == [pl.List, int]
+    assert s["bar"] == pl.Int64
+    assert s["baz"] == cardinal_directions
+    assert s["ham"] == pl.Datetime("us")
+
+    assert s.len() == 4
+    assert s.names() == ["foo", "bar", "baz", "ham"]
+    assert s.dtypes() == [pl.List, pl.Int64, cardinal_directions, pl.Datetime("us")]
+
+    assert list(s.to_python().values()) == [list, int, str, datetime]
+    assert [tp.to_python() for tp in s.dtypes()] == [list, int, str, datetime]
 
 
 def test_schema_equality() -> None:
     s1 = pl.Schema({"foo": pl.Int8(), "bar": pl.Float64()})
     s2 = pl.Schema({"foo": pl.Int8(), "bar": pl.String()})
     s3 = pl.Schema({"bar": pl.Float64(), "foo": pl.Int8()})
+
     assert s1 == s1
     assert s2 == s2
     assert s3 == s3
@@ -37,14 +47,38 @@ def test_schema_equality() -> None:
 
 
 def test_schema_picklable() -> None:
-    s = pl.Schema({"foo": pl.Int8(), "bar": pl.String()})
-
+    s = pl.Schema(
+        {
+            "foo": pl.Int8(),
+            "bar": pl.String(),
+            "ham": pl.Struct({"x": pl.List(pl.Date)}),
+        }
+    )
     pickled = pickle.dumps(s)
     s2 = pickle.loads(pickled)
-
     assert s == s2
 
 
+def test_schema_python() -> None:
+    input = {
+        "foo": pl.Int8(),
+        "bar": pl.String(),
+        "baz": pl.Categorical("lexical"),
+        "ham": pl.Object(),
+        "spam": pl.Struct({"time": pl.List(pl.Duration), "dist": pl.Float64}),
+    }
+    expected = {
+        "foo": int,
+        "bar": str,
+        "baz": str,
+        "ham": object,
+        "spam": dict,
+    }
+    for schema in (input, input.items(), list(input.items())):
+        s = pl.Schema(schema)
+        assert expected == s.to_python()
+
+
 def test_schema_in_map_elements_returns_scalar() -> None:
     schema = pl.Schema([("portfolio", pl.String()), ("irr", pl.Float64())])
 
@@ -62,6 +96,11 @@ def test_schema_in_map_elements_returns_scalar() -> None:
         )
         .alias("irr")
     )
-
     assert (q.collect_schema()) == schema
     assert q.collect().schema == schema
+
+
+def test_ir_cache_unique_18198() -> None:
+    lf = pl.LazyFrame({"a": [1]})
+    lf.collect_schema()
+    assert pl.concat([lf, lf]).collect().to_dict(as_series=False) == {"a": [1, 1]}
diff --git a/rust-toolchain.toml b/rust-toolchain.toml
index a6e580b10775..ef33a7d39711 100644
--- a/rust-toolchain.toml
+++ b/rust-toolchain.toml
@@ -1,2 +1,2 @@
 [toolchain]
-channel = "nightly-2024-07-26"
+channel = "nightly-2024-08-26"