diff --git a/Cargo.lock b/Cargo.lock index fda528e9c5..c63e8ec3e2 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -144,11 +144,12 @@ dependencies = [ [[package]] name = "anstyle-wincon" -version = "3.0.6" +version = "3.0.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2109dbce0e72be3ec00bed26e6a7479ca384ad226efdd66db8fa2e3a38c83125" +checksum = "ca3534e77181a9cc07539ad51f2141fe32f6c3ffd4df76db8ad92346b003ae4e" dependencies = [ "anstyle", + "once_cell", "windows-sys 0.59.0", ] @@ -312,7 +313,7 @@ dependencies = [ "simdutf8", "streaming-iterator", "strength_reduce", - "thiserror", + "thiserror 1.0.69", "tokio", "tokio-util", "zstd 0.12.4", @@ -380,7 +381,7 @@ checksum = "3b43422f69d8ff38f95f1b2bb76517c91589a924d1559a0e935d7c8ce0274c11" dependencies = [ "proc-macro2", "quote", - "syn 2.0.95", + "syn 2.0.98", ] [[package]] @@ -402,18 +403,18 @@ checksum = "c7c24de15d275a1ecfd47a380fb4d5ec9bfe0933f309ed5e705b775596a3574d" dependencies = [ "proc-macro2", "quote", - "syn 2.0.95", + "syn 2.0.98", ] [[package]] name = "async-trait" -version = "0.1.85" +version = "0.1.86" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3f934833b4b7233644e5848f235df3f57ed8c80f1528a26c3dfa13d2147fa056" +checksum = "644dd749086bf3771a2fbc5f256fdb982d53f011c7d5d560304eafeecebce79d" dependencies = [ "proc-macro2", "quote", - "syn 2.0.95", + "syn 2.0.98", ] [[package]] @@ -463,7 +464,7 @@ dependencies = [ "snap 0.2.5", "strum 0.18.0", "strum_macros 0.18.0", - "thiserror", + "thiserror 1.0.69", "typed-builder 0.5.1", "uuid 0.8.2", "zerocopy 0.3.2", @@ -915,7 +916,7 @@ dependencies = [ "serde_json", "time", "url", - "uuid 1.11.0", + "uuid 1.13.1", ] [[package]] @@ -936,7 +937,7 @@ dependencies = [ "time", "tz-rs", "url", - "uuid 1.11.0", + "uuid 1.13.1", ] [[package]] @@ -958,7 +959,7 @@ dependencies = [ "sha2", "time", "url", - "uuid 1.11.0", + "uuid 1.13.1", ] [[package]] @@ -978,7 +979,7 @@ dependencies = [ "serde_json", "time", "url", - "uuid 1.11.0", + "uuid 1.13.1", ] [[package]] @@ -1077,9 +1078,9 @@ checksum = "bef38d45163c2f1dde094a7dfd33ccf595c92905c8f8f4fdc18d06fb1037718a" [[package]] name = "bitflags" -version = "2.6.0" +version = "2.8.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b048fb63fd8b5923fc5aa7b340d8e156aec7ec02f0c78fa8a6ddc2613f6f71de" +checksum = "8f68f53c83ab957f72c32642f3868eec03eb974d1fb82e453128456482613d36" [[package]] name = "block-buffer" @@ -1118,7 +1119,7 @@ checksum = "cc97b8f16f944bba54f0433f07e30be199b6dc2bd25937444bbad560bcea29bd" dependencies = [ "alloc-no-stdlib", "alloc-stdlib", - "brotli-decompressor 4.0.1", + "brotli-decompressor 4.0.2", ] [[package]] @@ -1133,9 +1134,9 @@ dependencies = [ [[package]] name = "brotli-decompressor" -version = "4.0.1" +version = "4.0.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9a45bd2e4095a8b518033b128020dd4a55aab1c0a381ba4404a472630f4bc362" +checksum = "74fa05ad7d803d413eb8380983b092cbbaf9a85f151b871360e7b00cd7060b37" dependencies = [ "alloc-no-stdlib", "alloc-stdlib", @@ -1160,9 +1161,9 @@ checksum = "b4ae4235e6dac0694637c763029ecea1a2ec9e4e06ec2729bd21ba4d9c863eb7" [[package]] name = "bumpalo" -version = "3.16.0" +version = "3.17.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "79296716171880943b8470b5f8d03aa55eb2e645a4874bdbb28adb49162e012c" +checksum = "1628fb46dfa0b37568d12e5edd512553eccf6a22a78e8bde00bb4aed84d5bdbf" [[package]] name = "bytemuck" @@ -1181,7 +1182,7 @@ checksum = "3fa76293b4f7bb636ab88fd78228235b5248b4d05cc589aed610f954af5d7c7a" dependencies = [ "proc-macro2", "quote", - "syn 2.0.95", + "syn 2.0.98", ] [[package]] @@ -1198,9 +1199,9 @@ checksum = "8f1fe948ff07f4bd06c30984e69f5b4899c516a3ef74f34df92a2df2ab535495" [[package]] name = "bytes" -version = "1.9.0" +version = "1.10.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "325918d6fe32f23b19878fe4b34794ae41fc19ddbe53b10571a4874d44ffd39b" +checksum = "f61dac84819c6588b558454b194026eb1f09c293b9036ae9b159e74e73ab6cf9" [[package]] name = "bytes-utils" @@ -1224,9 +1225,9 @@ dependencies = [ [[package]] name = "bzip2-sys" -version = "0.1.11+1.0.8" +version = "0.1.12+1.0.8" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "736a955f3fa7875102d57c82b8cac37ec45224a07fd32d58f9f7a186b6cd4cdc" +checksum = "72ebc2f1a417f01e1da30ef264ee86ae31d2dcd2d603ea283d3c244a883ca2a9" dependencies = [ "cc", "libc", @@ -1239,7 +1240,7 @@ version = "0.4.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8e779867f62d81627d1438e0d3fb6ed7d7c9d64293ca6d87a1e88781b94ece1c" dependencies = [ - "syn 2.0.95", + "syn 2.0.98", ] [[package]] @@ -1250,9 +1251,9 @@ checksum = "37b2a672a2cb129a2e41c10b1224bb368f9f37a2b16b612598138befd7b37eb5" [[package]] name = "cc" -version = "1.2.7" +version = "1.2.14" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a012a0df96dd6d06ba9a1b29d6402d1a5d77c6befd2566afdc26e10603dc93d7" +checksum = "0c3d1b2e905a3a7b00a6141adb0e4c0bb941d11caf55349d863942a1cc44e3c9" dependencies = [ "jobserver", "libc", @@ -1282,9 +1283,9 @@ dependencies = [ [[package]] name = "chrono-tz" -version = "0.10.0" +version = "0.10.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cd6dd8046d00723a59a2f8c5f295c515b9bb9a331ee4f8f3d4dd49e428acd3b6" +checksum = "9c6ac4f2c0bf0f44e9161aec9675e1050aa4a530663c4a9e37e108fa948bca9f" dependencies = [ "chrono", "chrono-tz-build", @@ -1351,9 +1352,9 @@ dependencies = [ [[package]] name = "clap" -version = "4.5.24" +version = "4.5.29" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9560b07a799281c7e0958b9296854d6fafd4c5f31444a7e5bb1ad6dde5ccf1bd" +checksum = "8acebd8ad879283633b343856142139f2da2317c96b05b4dd6181c61e2480184" dependencies = [ "clap_builder", "clap_derive", @@ -1361,9 +1362,9 @@ dependencies = [ [[package]] name = "clap_builder" -version = "4.5.24" +version = "4.5.29" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "874e0dd3eb68bf99058751ac9712f622e61e6f393a94f7128fa26e3f02f5c7cd" +checksum = "f6ba32cbda51c7e1dfd49acc1457ba1a7dec5b64fe360e828acb13ca8dc9c2f9" dependencies = [ "anstream", "anstyle", @@ -1373,14 +1374,14 @@ dependencies = [ [[package]] name = "clap_derive" -version = "4.5.24" +version = "4.5.28" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "54b755194d6389280185988721fffba69495eed5ee9feeee9a599b53db80318c" +checksum = "bf4ced95c6f4a675af3da73304b9ac4ed991640c36374e4b46795c49e17cf1ed" dependencies = [ "heck 0.5.0", "proc-macro2", "quote", - "syn 2.0.95", + "syn 2.0.98", ] [[package]] @@ -1400,9 +1401,9 @@ checksum = "f46ad14479a25103f283c0f10005961cf086d8dc42205bb44c46ac563475dca6" [[package]] name = "cmake" -version = "0.1.52" +version = "0.1.54" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c682c223677e0e5b6b7f63a64b9351844c3f1b1678a68b7ee617e30fb082620e" +checksum = "e7caa3f9de89ddbe2c607f4101924c5abec803763ae9534e4f4d7d8f84aa81f0" dependencies = [ "cc", ] @@ -1441,13 +1442,12 @@ dependencies = [ [[package]] name = "comfy-table" -version = "7.1.3" +version = "7.1.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "24f165e7b643266ea80cb858aed492ad9280e3e05ce24d4a99d7d7b889b6a4d9" +checksum = "4a65ebfec4fb190b6f90e944a817d60499ee0744e582530e2c9900a22e591d9a" dependencies = [ "crossterm", - "strum 0.26.3", - "strum_macros 0.26.4", + "unicode-segmentation", "unicode-width 0.2.0", ] @@ -1474,8 +1474,8 @@ dependencies = [ name = "common-display" version = "0.3.0-dev0" dependencies = [ - "comfy-table 7.1.3", - "indexmap 2.7.0", + "comfy-table 7.1.4", + "indexmap 2.7.1", "pyo3", "terminal_size", "textwrap", @@ -1490,7 +1490,7 @@ dependencies = [ "pyo3", "regex", "serde_json", - "thiserror", + "thiserror 1.0.69", ] [[package]] @@ -1694,9 +1694,9 @@ checksum = "773648b94d0e5d620f64f280777445740e61fe701025087ec8b57f45c791888b" [[package]] name = "cpufeatures" -version = "0.2.16" +version = "0.2.17" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "16b80225097f2e5ae4e7179dd2266824648f3e2f49d9134d584b76389d31c4c3" +checksum = "59ed5838eebb26a2bb2e58f6d5b5316989ae9d08bab10e0e6d103e656d1b0280" dependencies = [ "libc", ] @@ -1819,7 +1819,7 @@ version = "0.28.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "829d955a0bb380ef178a640b91779e3987da38c9aea133b20614cfed8cdea9c6" dependencies = [ - "bitflags 2.6.0", + "bitflags 2.8.0", "crossterm_winapi", "parking_lot 0.12.3", "rustix", @@ -1837,9 +1837,9 @@ dependencies = [ [[package]] name = "crunchy" -version = "0.2.2" +version = "0.2.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7a81dae078cea95a014a339291cec439d2f232ebe854a9d672b796c6afafa9b7" +checksum = "43da5946c66ffcc7745f48db692ffbb10a83bfe0afd96235c5c2a4fb23994929" [[package]] name = "crypto-common" @@ -1890,9 +1890,9 @@ dependencies = [ [[package]] name = "csv-core" -version = "0.1.11" +version = "0.1.12" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5efa2b3d7902f4b634a20cae3c9c4e6209dc4779feb6863329607560143efa70" +checksum = "7d02f3b0da4c6504f86e9cd789d8dbafab48c2321be74e9987593de5a894d93d" dependencies = [ "memchr", ] @@ -1904,7 +1904,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "32a2785755761f3ddc1492979ce1e48d2c00d09311c39e4466429188f3dd6501" dependencies = [ "quote", - "syn 2.0.95", + "syn 2.0.98", ] [[package]] @@ -1924,7 +1924,6 @@ dependencies = [ "common-version", "daft-algebra", "daft-catalog", - "daft-catalog-python-catalog", "daft-compression", "daft-connect", "daft-context", @@ -1970,7 +1969,7 @@ dependencies = [ "common-treenode", "daft-dsl", "daft-schema", - "indexmap 2.7.0", + "indexmap 2.7.1", "rstest", ] @@ -1981,22 +1980,11 @@ dependencies = [ "common-error", "daft-core", "daft-logical-plan", - "lazy_static", "pyo3", "snafu", "sqlparser", ] -[[package]] -name = "daft-catalog-python-catalog" -version = "0.3.0-dev0" -dependencies = [ - "daft-catalog", - "daft-logical-plan", - "pyo3", - "snafu", -] - [[package]] name = "daft-compression" version = "0.3.0-dev0" @@ -2038,7 +2026,7 @@ dependencies = [ "tokio", "tonic", "tracing", - "uuid 1.11.0", + "uuid 1.13.1", ] [[package]] @@ -2062,7 +2050,7 @@ dependencies = [ "bincode", "chrono", "chrono-tz", - "comfy-table 7.1.3", + "comfy-table 7.1.4", "common-arrow-ffi", "common-display", "common-error", @@ -2077,7 +2065,7 @@ dependencies = [ "fnv", "html-escape", "hyperloglog", - "indexmap 2.7.0", + "indexmap 2.7.1", "itertools 0.11.0", "lazy_static", "log", @@ -2149,7 +2137,7 @@ dependencies = [ "daft-core", "daft-sketch", "derive_more", - "indexmap 2.7.0", + "indexmap 2.7.1", "itertools 0.11.0", "pyo3", "serde", @@ -2179,7 +2167,7 @@ dependencies = [ "tiktoken-rs", "tokio", "typetag", - "uuid 1.11.0", + "uuid 1.13.1", "xxhash-rust", ] @@ -2293,7 +2281,7 @@ dependencies = [ "daft-io", "daft-recordbatch", "futures", - "indexmap 2.7.0", + "indexmap 2.7.1", "memchr", "memmap2", "num-traits", @@ -2337,7 +2325,7 @@ dependencies = [ "daft-scan", "daft-writers", "futures", - "indexmap 2.7.0", + "indexmap 2.7.1", "indicatif", "itertools 0.11.0", "kanal", @@ -2386,7 +2374,7 @@ dependencies = [ "daft-functions", "daft-schema", "derivative", - "indexmap 2.7.0", + "indexmap 2.7.1", "itertools 0.11.0", "log", "num-format", @@ -2399,7 +2387,7 @@ dependencies = [ "test-log", "tokio", "typed-builder 0.20.0", - "uuid 1.11.0", + "uuid 1.13.1", ] [[package]] @@ -2463,7 +2451,7 @@ dependencies = [ "daft-recordbatch", "daft-stats", "futures", - "indexmap 2.7.0", + "indexmap 2.7.1", "itertools 0.11.0", "log", "parquet2", @@ -2514,7 +2502,7 @@ name = "daft-recordbatch" version = "0.3.0-dev0" dependencies = [ "arrow2", - "comfy-table 7.1.3", + "comfy-table 7.1.4", "common-arrow-ffi", "common-display", "common-error", @@ -2525,7 +2513,7 @@ dependencies = [ "daft-logical-plan", "futures", "html-escape", - "indexmap 2.7.0", + "indexmap 2.7.1", "num-traits", "pyo3", "rand 0.8.5", @@ -2557,7 +2545,7 @@ dependencies = [ "daft-schema", "daft-stats", "futures", - "indexmap 2.7.0", + "indexmap 2.7.1", "itertools 0.11.0", "parquet2", "pyo3", @@ -2602,7 +2590,7 @@ dependencies = [ "common-version", "derive_more", "html-escape", - "indexmap 2.7.0", + "indexmap 2.7.1", "num-derive", "num-traits", "pyo3", @@ -2617,7 +2605,7 @@ dependencies = [ "daft-catalog", "daft-logical-plan", "pyo3", - "uuid 1.11.0", + "uuid 1.13.1", ] [[package]] @@ -2665,7 +2653,7 @@ dependencies = [ "daft-core", "daft-dsl", "daft-recordbatch", - "indexmap 2.7.0", + "indexmap 2.7.1", "serde", "snafu", ] @@ -2707,7 +2695,7 @@ dependencies = [ "proc-macro2", "quote", "strsim", - "syn 2.0.95", + "syn 2.0.98", ] [[package]] @@ -2718,7 +2706,7 @@ checksum = "d336a2a514f6ccccaa3e09b02d41d35330c07ddf03a62165fcec10bb561c7806" dependencies = [ "darling_core", "quote", - "syn 2.0.95", + "syn 2.0.98", ] [[package]] @@ -2791,7 +2779,7 @@ dependencies = [ "darling", "proc-macro2", "quote", - "syn 2.0.95", + "syn 2.0.98", ] [[package]] @@ -2801,7 +2789,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ab63b0e2bf4d5928aff72e83a7dace85d7bba5fe12dcc3c5a572d78caffd3f3c" dependencies = [ "derive_builder_core", - "syn 2.0.95", + "syn 2.0.98", ] [[package]] @@ -2821,7 +2809,7 @@ checksum = "cb7330aeadfbe296029522e6c40f315320aba36fc43a5b3632f3795348f3bd22" dependencies = [ "proc-macro2", "quote", - "syn 2.0.95", + "syn 2.0.98", "unicode-xid", ] @@ -2870,7 +2858,7 @@ checksum = "97369cbbc041bc366949bc74d34658d6cda5621039731c6310521892a3a20ae0" dependencies = [ "proc-macro2", "quote", - "syn 2.0.95", + "syn 2.0.98", ] [[package]] @@ -2881,9 +2869,9 @@ checksum = "fea41bba32d969b513997752735605054bc0dfa92b4c56bf1189f2e174be7a10" [[package]] name = "dyn-clone" -version = "1.0.17" +version = "1.0.18" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0d6ef0072f8a535281e4876be788938b528e9a1d43900b82c2569af7da799125" +checksum = "feeef44e73baff3a26d371801df019877a9866a8c493d315ab00177843314f35" [[package]] name = "either" @@ -2939,9 +2927,9 @@ dependencies = [ [[package]] name = "equivalent" -version = "1.0.1" +version = "1.0.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5443807d6dff69373d433ab9ef5378ad8df50ca6298caf15de6e52e24aaf54d5" +checksum = "877a4ace8713b0bcf2a4e7eec82529c029f1d0619886d18145fea96c3ffe5c0f" [[package]] name = "erased-serde" @@ -3169,7 +3157,7 @@ checksum = "162ee34ebcb7c64a8abebc059ce0fee27c2262618d7b60ed8faf72fef13c3650" dependencies = [ "proc-macro2", "quote", - "syn 2.0.95", + "syn 2.0.98", ] [[package]] @@ -3242,6 +3230,18 @@ dependencies = [ "wasm-bindgen", ] +[[package]] +name = "getrandom" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "43a49c392881ce6d5c3b8cb70f98717b7c07aabbdff06687b9030dbfbe2725f8" +dependencies = [ + "cfg-if", + "libc", + "wasi 0.13.3+wasi-0.2.2", + "windows-targets 0.52.6", +] + [[package]] name = "gif" version = "0.13.1" @@ -3309,7 +3309,7 @@ dependencies = [ "reqwest 0.12.12", "serde", "serde_json", - "thiserror", + "thiserror 1.0.69", "time", "tokio", "tracing", @@ -3318,12 +3318,12 @@ dependencies = [ [[package]] name = "google-cloud-metadata" -version = "0.5.0" +version = "0.5.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "04f945a208886a13d07636f38fb978da371d0abc3e34bad338124b9f8c135a8f" +checksum = "d901aeb453fd80e51d64df4ee005014f6cf39f2d736dd64f7239c132d9d39a6a" dependencies = [ "reqwest 0.12.12", - "thiserror", + "thiserror 1.0.69", "tokio", ] @@ -3349,11 +3349,11 @@ dependencies = [ "regex", "reqwest 0.12.12", "reqwest-middleware", - "ring 0.17.8", + "ring 0.17.9", "serde", "serde_json", "sha2", - "thiserror", + "thiserror 1.0.69", "time", "tokio", "tracing", @@ -3381,7 +3381,7 @@ dependencies = [ "futures-sink", "futures-util", "http 0.2.12", - "indexmap 2.7.0", + "indexmap 2.7.1", "slab", "tokio", "tokio-util", @@ -3400,7 +3400,7 @@ dependencies = [ "futures-core", "futures-sink", "http 1.2.0", - "indexmap 2.7.0", + "indexmap 2.7.1", "slab", "tokio", "tokio-util", @@ -3604,9 +3604,9 @@ dependencies = [ [[package]] name = "httparse" -version = "1.9.5" +version = "1.10.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7d71d3574edd2771538b901e6549113b4006ece66150fb69c0fb6d9a2adae946" +checksum = "f2d708df4e7140240a16cd6ab0ab65c972d7433ab77819ea693fde9c43811e2a" [[package]] name = "httpdate" @@ -3649,9 +3649,9 @@ dependencies = [ [[package]] name = "hyper" -version = "1.5.2" +version = "1.6.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "256fb8d4bd6413123cc9d91832d78325c48ff41677595be797d90f42969beae0" +checksum = "cc2b571658e38e0c01b1fdca3bbbe93c00d3d71693ff2770043f8c29bc7d6f80" dependencies = [ "bytes", "futures-channel", @@ -3674,7 +3674,7 @@ version = "0.5.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "2b90d566bffbce6a75bd8b09a05aa8c2cb1fabb6cb348f8840c9e4c90a0d83b0" dependencies = [ - "hyper 1.5.2", + "hyper 1.6.0", "hyper-util", "pin-project-lite", "tokio", @@ -3702,7 +3702,7 @@ checksum = "70206fc6890eaca9fde8a0bf71caa2ddfc9fe045ac9e5c70df101a7dbde866e0" dependencies = [ "bytes", "http-body-util", - "hyper 1.5.2", + "hyper 1.6.0", "hyper-util", "native-tls", "tokio", @@ -3721,7 +3721,7 @@ dependencies = [ "futures-util", "http 1.2.0", "http-body 1.0.1", - "hyper 1.5.2", + "hyper 1.6.0", "pin-project-lite", "socket2", "tokio", @@ -3871,7 +3871,7 @@ checksum = "1ec89e9337638ecdc08744df490b221a7399bf8d164eb52a665454e60e075ad6" dependencies = [ "proc-macro2", "quote", - "syn 2.0.95", + "syn 2.0.98", ] [[package]] @@ -3921,9 +3921,9 @@ dependencies = [ [[package]] name = "image-webp" -version = "0.2.0" +version = "0.2.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e031e8e3d94711a9ccb5d6ea357439ef3dcbed361798bd4071dc4d9793fbe22f" +checksum = "b77d01e822461baa8409e156015a1d91735549f0f2c17691bd2d996bef238f7f" dependencies = [ "byteorder-lite", "quick-error 2.0.1", @@ -3941,9 +3941,9 @@ dependencies = [ [[package]] name = "indexmap" -version = "2.7.0" +version = "2.7.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "62f822373a4fe84d4bb149bf54e584a7f4abec90e072ed49cda0edea5b95471f" +checksum = "8c9c992b02b5b4c94ea26e32fe5bccb7aa7d9f390ab5c1221ff895bc7ea8b652" dependencies = [ "equivalent", "hashbrown 0.15.2", @@ -3952,9 +3952,9 @@ dependencies = [ [[package]] name = "indicatif" -version = "0.17.9" +version = "0.17.11" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cbf675b85ed934d3c67b5c5469701eec7db22689d0a2139d856e0925fa28b281" +checksum = "183b3088984b400f4cfac3620d5e076c84da5364016b4f49473de574b2586235" dependencies = [ "console", "number_prefix", @@ -3989,18 +3989,18 @@ dependencies = [ [[package]] name = "inventory" -version = "0.3.17" +version = "0.3.19" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3b31349d02fe60f80bbbab1a9402364cad7460626d6030494b08ac4a2075bf81" +checksum = "54b12ebb6799019b044deaf431eadfe23245b259bba5a2c0796acec3943a3cdb" dependencies = [ "rustversion", ] [[package]] name = "ipnet" -version = "2.10.1" +version = "2.11.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ddc24109865250148c2e0f3d25d4f0f479571723792d3802153c60922a4fb708" +checksum = "469fb0b9cefa57e3ef31275ee7cacb78f2fdca44e4765491884a2b119d4eb130" [[package]] name = "is_ci" @@ -4034,9 +4034,9 @@ dependencies = [ [[package]] name = "itertools" -version = "0.13.0" +version = "0.14.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "413ee7dfc52ee1a4949ceeb7dbc8a33f2d6c088194d9f922fb8318faf1f01186" +checksum = "2b192c782037fadd9cfa75548310488aabdbf3d2da73885b31bd0abd03351285" dependencies = [ "either", ] @@ -4073,7 +4073,7 @@ dependencies = [ "ahash", "dyn-clone", "hifijson", - "indexmap 2.7.0", + "indexmap 2.7.1", "jaq-syn", "once_cell", "serde_json", @@ -4124,9 +4124,9 @@ checksum = "f5d4a7da358eff58addd2877a45865158f0d78c911d43a5784ceb7bbf52833b0" [[package]] name = "js-sys" -version = "0.3.76" +version = "0.3.77" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6717b6b5b077764fb5966237269cb3c64edddde4b14ce42647430a78ced9e7b7" +checksum = "1cfaf33c695fc6e08064efbc1f72ec937429614f25eef83af942d0e227c3a28f" dependencies = [ "once_cell", "wasm-bindgen", @@ -4143,14 +4143,14 @@ dependencies = [ [[package]] name = "jsonwebtoken" -version = "9.3.0" +version = "9.3.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b9ae10193d25051e74945f1ea2d0b42e03cc3b890f7e4cc5faa44997d808193f" +checksum = "5a87cc7a48537badeae96744432de36f4be2b4a34a05a5ef32e9dd8a1c169dde" dependencies = [ - "base64 0.21.7", + "base64 0.22.1", "js-sys", "pem", - "ring 0.17.8", + "ring 0.17.9", "serde", "serde_json", "simple_asn1", @@ -4376,9 +4376,9 @@ dependencies = [ [[package]] name = "log" -version = "0.4.22" +version = "0.4.25" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a7a70ba024b9dc04c27ea2f0c0548feb474ec5c54bba33a7f72f873a39d07b24" +checksum = "04cbf5b083de1c7e0222a7a51dbfdba1cbe1c6ab0b15e29fff3f6c077fd9cd9f" [[package]] name = "lz4" @@ -4502,9 +4502,9 @@ dependencies = [ [[package]] name = "miniz_oxide" -version = "0.8.2" +version = "0.8.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4ffbe83022cedc1d264172192511ae958937694cd57ce297164951b8b3568394" +checksum = "b3b1c9bd4fe1f0f8b387f6eb9eb3b4a1aa26185e5750efb9140301703f62cd1b" dependencies = [ "adler2", "simd-adler32", @@ -4551,9 +4551,9 @@ checksum = "97af489e1e21b68de4c390ecca6703318bc1aa16e9733bcb62c089b73c6fbb1b" [[package]] name = "native-tls" -version = "0.2.12" +version = "0.2.13" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a8614eb2c83d59d1c8cc974dd3f920198647674a0a035e1af1fa58707e317466" +checksum = "0dab59f8e050d5df8e4dd87d9206fb6f65a483e20ac9fda365ade4fab353196c" dependencies = [ "libc", "log", @@ -4739,7 +4739,7 @@ dependencies = [ "num-integer", "num-traits", "pyo3", - "rustc-hash 2.1.0", + "rustc-hash 2.1.1", ] [[package]] @@ -4757,7 +4757,7 @@ dependencies = [ "serde_json", "serde_path_to_error", "sha2", - "thiserror", + "thiserror 1.0.69", "url", ] @@ -4772,15 +4772,15 @@ dependencies = [ [[package]] name = "once_cell" -version = "1.20.2" +version = "1.20.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1261fe7e33c73b354eab43b1273a57c8f967d0391e80353e51f764ac02cf6775" +checksum = "945462a4b81e43c4e3ba96bd7b49d834c6f61198356aa858733bc4acf3cbe62e" [[package]] name = "oneshot" -version = "0.1.8" +version = "0.1.10" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e296cf87e61c9cfc1a61c3c63a0f7f286ed4554e0e22be84e8a38e1d264a2a29" +checksum = "79d72a7c0f743d2ebb0a2ad1d219db75fdc799092ed3a884c9144c42a31225bd" [[package]] name = "oorandom" @@ -4790,11 +4790,11 @@ checksum = "b410bbe7e14ab526a0e86877eb47c6996a2bd7746f027ba551028c925390e4e9" [[package]] name = "openssl" -version = "0.10.68" +version = "0.10.70" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6174bc48f102d208783c2c84bf931bb75927a617866870de8a4ea85597f871f5" +checksum = "61cfb4e166a8bb8c9b55c500bc2308550148ece889be90f609377e58140f42c6" dependencies = [ - "bitflags 2.6.0", + "bitflags 2.8.0", "cfg-if", "foreign-types", "libc", @@ -4811,29 +4811,29 @@ checksum = "a948666b637a0f465e8564c73e89d4dde00d72d4d473cc972f390fc3dcee7d9c" dependencies = [ "proc-macro2", "quote", - "syn 2.0.95", + "syn 2.0.98", ] [[package]] name = "openssl-probe" -version = "0.1.5" +version = "0.1.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ff011a302c396a5197692431fc1948019154afc178baf7d8e37367442a4601cf" +checksum = "d05e27ee213611ffe7d6348b942e8f942b37114c00cc03cec254295a4a17852e" [[package]] name = "openssl-src" -version = "300.4.1+3.4.0" +version = "300.4.2+3.4.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "faa4eac4138c62414b5622d1b31c5c304f34b406b013c079c2bbc652fdd6678c" +checksum = "168ce4e058f975fe43e89d9ccf78ca668601887ae736090aacc23ae353c298e2" dependencies = [ "cc", ] [[package]] name = "openssl-sys" -version = "0.9.104" +version = "0.9.105" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "45abf306cbf99debc8195b66b7346498d7b10c210de50418b5ccd7ceba08c741" +checksum = "8b22d5b84be05a8d6947c7cb71f7c849aa0f112acd4bf51c2a7c1c988ac0a9dc" dependencies = [ "cc", "libc", @@ -4861,9 +4861,9 @@ checksum = "e2355d85b9a3786f481747ced0e0ff2ba35213a1f9bd406ed906554d7af805a1" [[package]] name = "outref" -version = "0.5.1" +version = "0.5.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4030760ffd992bef45b0ae3f10ce1aba99e33464c90d14dd7c039884963ddc7a" +checksum = "1a80800c0488c3a21695ea981a54918fbb37abf04f4d0720c453632255e2ff0e" [[package]] name = "overload" @@ -4944,7 +4944,7 @@ dependencies = [ "criterion", "flate2", "futures", - "indexmap 2.7.0", + "indexmap 2.7.1", "lz4", "lz4_flex", "parquet-format-safe", @@ -4953,7 +4953,7 @@ dependencies = [ "serde", "snap 1.1.1", "streaming-decompression", - "thiserror", + "thiserror 1.0.69", "tokio", "xxhash-rust", "zstd 0.12.4", @@ -5045,22 +5045,22 @@ dependencies = [ [[package]] name = "pin-project" -version = "1.1.8" +version = "1.1.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1e2ec53ad785f4d35dac0adea7f7dc6f1bb277ad84a680c7afefeae05d1f5916" +checksum = "dfe2e71e1471fe07709406bf725f710b02927c9c54b2b5b2ec0e8087d97c327d" dependencies = [ "pin-project-internal", ] [[package]] name = "pin-project-internal" -version = "1.1.8" +version = "1.1.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d56a66c0c55993aa927429d0f8a0abfd74f084e4d9c192cffed01e418d83eefb" +checksum = "f6e859e6e5bd50440ab63c47e3ebabc90f26251f7c73c3d3e837b74a1cc3fa67" dependencies = [ "proc-macro2", "quote", - "syn 2.0.95", + "syn 2.0.98", ] [[package]] @@ -5189,9 +5189,9 @@ dependencies = [ [[package]] name = "proc-macro2" -version = "1.0.92" +version = "1.0.93" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "37d3544b3f2748c54e147655edb5025752e2303145b5aefb3c3ea2c78b973bb0" +checksum = "60946a68e5f9d28b0dc1c21bb8a97ee7d018a8b322fa57838ba31cc878e22d99" dependencies = [ "unicode-ident", ] @@ -5204,7 +5204,7 @@ checksum = "14cae93065090804185d3b75f0bf93b8eeda30c7a9b4a33d3bdb3988d6229e50" dependencies = [ "bit-set 0.8.0", "bit-vec 0.8.0", - "bitflags 2.6.0", + "bitflags 2.8.0", "lazy_static", "num-traits", "rand 0.8.5", @@ -5238,12 +5238,12 @@ dependencies = [ [[package]] name = "prost" -version = "0.13.4" +version = "0.13.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2c0fef6c4230e4ccf618a35c59d7ede15dea37de8427500f50aff708806e42ec" +checksum = "2796faa41db3ec313a31f7624d9286acf277b52de526150b7e69f3debf891ee5" dependencies = [ "bytes", - "prost-derive 0.13.4", + "prost-derive 0.13.5", ] [[package]] @@ -5274,35 +5274,35 @@ dependencies = [ [[package]] name = "prost-derive" -version = "0.13.4" +version = "0.13.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "157c5a9d7ea5c2ed2d9fb8f495b64759f7816c7eaea54ba3978f0d63000162e3" +checksum = "8a56d757972c98b346a9b766e3f02746cde6dd1cd1d1d563472929fdd74bec4d" dependencies = [ "anyhow", - "itertools 0.13.0", + "itertools 0.14.0", "proc-macro2", "quote", - "syn 2.0.95", + "syn 2.0.98", ] [[package]] name = "prost-types" -version = "0.13.4" +version = "0.13.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cc2f1e56baa61e93533aebc21af4d2134b70f66275e0fcdf3cbe43d77ff7e8fc" +checksum = "52c2c1bf36ddb1a1c396b3601a3cec27c2462e45f07c386894ec3ccf5332bd16" dependencies = [ - "prost 0.13.4", + "prost 0.13.5", ] [[package]] name = "pyo3" -version = "0.23.3" +version = "0.23.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e484fd2c8b4cb67ab05a318f1fd6fa8f199fcc30819f08f07d200809dba26c15" +checksum = "57fe09249128b3173d092de9523eaa75136bf7ba85e0d69eca241c7939c933cc" dependencies = [ "cfg-if", "chrono", - "indexmap 2.7.0", + "indexmap 2.7.1", "indoc", "inventory", "libc", @@ -5317,9 +5317,9 @@ dependencies = [ [[package]] name = "pyo3-build-config" -version = "0.23.3" +version = "0.23.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "dc0e0469a84f208e20044b98965e1561028180219e35352a2afaf2b942beff3b" +checksum = "1cd3927b5a78757a0d71aa9dff669f903b1eb64b54142a9bd9f757f8fde65fd7" dependencies = [ "once_cell", "target-lexicon", @@ -5327,9 +5327,9 @@ dependencies = [ [[package]] name = "pyo3-ffi" -version = "0.23.3" +version = "0.23.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "eb1547a7f9966f6f1a0f0227564a9945fe36b90da5a93b3933fc3dc03fae372d" +checksum = "dab6bb2102bd8f991e7749f130a70d05dd557613e39ed2deeee8e9ca0c4d548d" dependencies = [ "libc", "pyo3-build-config", @@ -5348,27 +5348,27 @@ dependencies = [ [[package]] name = "pyo3-macros" -version = "0.23.3" +version = "0.23.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fdb6da8ec6fa5cedd1626c886fc8749bdcbb09424a86461eb8cdf096b7c33257" +checksum = "91871864b353fd5ffcb3f91f2f703a22a9797c91b9ab497b1acac7b07ae509c7" dependencies = [ "proc-macro2", "pyo3-macros-backend", "quote", - "syn 2.0.95", + "syn 2.0.98", ] [[package]] name = "pyo3-macros-backend" -version = "0.23.3" +version = "0.23.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "38a385202ff5a92791168b1136afae5059d3ac118457bb7bc304c197c2d33e7d" +checksum = "43abc3b80bc20f3facd86cd3c60beed58c3e2aa26213f3cda368de39c60a27e4" dependencies = [ "heck 0.5.0", "proc-macro2", "pyo3-build-config", "quote", - "syn 2.0.95", + "syn 2.0.98", ] [[package]] @@ -5544,7 +5544,7 @@ version = "0.5.8" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "03a862b389f93e68874fbf580b9de08dd02facb9a788ebadaf4a3fd33cf58834" dependencies = [ - "bitflags 2.6.0", + "bitflags 2.8.0", ] [[package]] @@ -5564,7 +5564,7 @@ checksum = "bcc303e793d3734489387d205e9b186fac9c6cfacedd98cbb2e8a5943595f3e6" dependencies = [ "proc-macro2", "quote", - "syn 2.0.95", + "syn 2.0.98", ] [[package]] @@ -5673,7 +5673,7 @@ dependencies = [ "http 1.2.0", "http-body 1.0.1", "http-body-util", - "hyper 1.5.2", + "hyper 1.6.0", "hyper-tls 0.6.0", "hyper-util", "ipnet", @@ -5714,7 +5714,7 @@ dependencies = [ "http 1.2.0", "reqwest 0.12.12", "serde", - "thiserror", + "thiserror 1.0.69", "tower-service", ] @@ -5729,7 +5729,7 @@ dependencies = [ "futures", "getrandom 0.2.15", "http 1.2.0", - "hyper 1.5.2", + "hyper 1.6.0", "parking_lot 0.11.2", "reqwest 0.12.12", "reqwest-middleware", @@ -5757,7 +5757,7 @@ dependencies = [ "cc", "libc", "once_cell", - "spin 0.5.2", + "spin", "untrusted 0.7.1", "web-sys", "winapi", @@ -5765,15 +5765,14 @@ dependencies = [ [[package]] name = "ring" -version = "0.17.8" +version = "0.17.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c17fa4cb658e3583423e915b9f3acc01cceaee1860e33d59ebae66adc3a2dc0d" +checksum = "e75ec5e92c4d8aede845126adc388046234541629e76029599ed35a003c7ed24" dependencies = [ "cc", "cfg-if", "getrandom 0.2.15", "libc", - "spin 0.9.8", "untrusted 0.9.0", "windows-sys 0.52.0", ] @@ -5809,7 +5808,7 @@ dependencies = [ "regex", "relative-path", "rustc_version", - "syn 2.0.95", + "syn 2.0.98", "unicode-ident", ] @@ -5827,9 +5826,9 @@ checksum = "08d43f7aa6b08d49f382cde6a7982047c3426db949b1424bc4b7ec9ae12c6ce2" [[package]] name = "rustc-hash" -version = "2.1.0" +version = "2.1.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c7fb8039b3032c191086b10f11f319a6e99e1e82889c5cc6046f515c9db1d497" +checksum = "357703d41365b4b27c590e3ed91eabb1b663f07c4c084095e60cbed4362dff0d" [[package]] name = "rustc_version" @@ -5842,11 +5841,11 @@ dependencies = [ [[package]] name = "rustix" -version = "0.38.43" +version = "0.38.44" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a78891ee6bf2340288408954ac787aa063d8e8817e9f53abb37c695c6d834ef6" +checksum = "fdb5bc1ae2baa591800df16c9ca78619bf65c0488b41b96ccec5d11220d8c154" dependencies = [ - "bitflags 2.6.0", + "bitflags 2.8.0", "errno", "libc", "linux-raw-sys", @@ -5873,9 +5872,9 @@ dependencies = [ [[package]] name = "rustls-pki-types" -version = "1.10.1" +version = "1.11.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d2bf47e6ff922db3825eb750c4e2ff784c6ff8fb9e13046ef6a1d1c5401b0b37" +checksum = "917ce264624a4b4db1c364dcc35bfca9ded014d0a958cd47ad3e960e988ea51c" [[package]] name = "rustversion" @@ -5897,9 +5896,9 @@ dependencies = [ [[package]] name = "ryu" -version = "1.0.18" +version = "1.0.19" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f3cb5ba0dc43242ce17de99c180e96db90b235b8a9fdc9543c96d2209116bd9f" +checksum = "6ea1a2d0a644769cc99faa24c3ad26b379b786fe7c36fd3c546254801650e6dd" [[package]] name = "same-file" @@ -6001,7 +6000,7 @@ checksum = "1db149f81d46d2deba7cd3c50772474707729550221e69588478ebf9ada425ae" dependencies = [ "proc-macro2", "quote", - "syn 2.0.95", + "syn 2.0.98", ] [[package]] @@ -6019,7 +6018,7 @@ version = "2.11.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "897b2245f0b511c87893af39b033e5ca9cce68824c4d7e7630b5a1d339658d02" dependencies = [ - "bitflags 2.6.0", + "bitflags 2.8.0", "core-foundation", "core-foundation-sys", "libc", @@ -6038,9 +6037,9 @@ dependencies = [ [[package]] name = "semver" -version = "1.0.24" +version = "1.0.25" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3cb6eb87a131f756572d7fb904f6e7b68633f09cca868c5df1c4b8d1a694bbba" +checksum = "f79dfe2d285b0488816f30e700a7438c5a73d816b5b7d3ac72fbc48b0d185e03" [[package]] name = "seq-macro" @@ -6078,16 +6077,16 @@ checksum = "5a9bf7cf98d04a2b28aead066b7496853d4779c9cc183c440dbac457641e19a0" dependencies = [ "proc-macro2", "quote", - "syn 2.0.95", + "syn 2.0.98", ] [[package]] name = "serde_json" -version = "1.0.135" +version = "1.0.138" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2b0d7ba2887406110130a978386c4e1befb98c674b4fba677954e4db976630d9" +checksum = "d434192e7da787e94a6ea7e9670b26a036d0ca41e0b7efb2676dd32bae872949" dependencies = [ - "indexmap 2.7.0", + "indexmap 2.7.1", "itoa", "memchr", "ryu", @@ -6112,7 +6111,7 @@ checksum = "c7715380eec75f029a4ef7de39a9200e0a63823176b759d055b613f5a87df6a6" dependencies = [ "percent-encoding", "serde", - "thiserror", + "thiserror 1.0.69", ] [[package]] @@ -6216,13 +6215,13 @@ checksum = "e3a9fe34e3e7a50316060351f37187a3f546bce95496156754b601a5fa71b76e" [[package]] name = "simple_asn1" -version = "0.6.2" +version = "0.6.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "adc4e5204eb1910f40f9cfa375f6f05b68c3abac4b6fd879c8ff5e7ae8a0a085" +checksum = "297f631f50729c8c99b84667867963997ec0b50f32b2a7dbcab828ef0541e8bb" dependencies = [ "num-bigint 0.4.6", "num-traits", - "thiserror", + "thiserror 2.0.11", "time", ] @@ -6316,7 +6315,7 @@ dependencies = [ name = "spark-connect" version = "0.3.0-dev0" dependencies = [ - "prost 0.13.4", + "prost 0.13.5", "prost-types", "tonic", ] @@ -6327,12 +6326,6 @@ version = "0.5.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6e63cff320ae2c57904679ba7cb63280a3dc4613885beafb148ee7bf9aa9042d" -[[package]] -name = "spin" -version = "0.9.8" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6980e8d7511241f8acf4aebddbb1ff938df5eebe98691418c4468d0b72a96a67" - [[package]] name = "spki" version = "0.7.3" @@ -6447,7 +6440,7 @@ dependencies = [ "proc-macro2", "quote", "rustversion", - "syn 2.0.95", + "syn 2.0.98", ] [[package]] @@ -6478,9 +6471,9 @@ dependencies = [ [[package]] name = "syn" -version = "2.0.95" +version = "2.0.98" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "46f71c0377baf4ef1cc3e3402ded576dccc315800fbc62dfc7fe04b009773b4a" +checksum = "36147f1a48ae0ec2b5b3bc5b537d267457555a10dc06f3dbc8cb11ba3006d3b1" dependencies = [ "proc-macro2", "quote", @@ -6522,7 +6515,7 @@ checksum = "c8af7666ab7b6390ab78131fb5b0fce11d6b7a6951602017c35fa82800708971" dependencies = [ "proc-macro2", "quote", - "syn 2.0.95", + "syn 2.0.98", ] [[package]] @@ -6568,7 +6561,7 @@ checksum = "257822358c6f206fed78bfe6369cf959063b0644d70f88df6b19f2dadc93423e" dependencies = [ "alloca", "anyhow", - "clap 4.5.24", + "clap 4.5.29", "colorz", "glob-match", "goblin", @@ -6578,7 +6571,7 @@ dependencies = [ "rand 0.8.5", "scroll", "tempfile", - "thiserror", + "thiserror 1.0.69", ] [[package]] @@ -6595,13 +6588,13 @@ checksum = "61c41af27dd6d1e27b1b16b489db798443478cef1f06a660c96db617ba5de3b1" [[package]] name = "tempfile" -version = "3.15.0" +version = "3.16.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9a8a559c81686f576e8cd0290cd2a24a2a9ad80c98b3478856500fcbd7acd704" +checksum = "38c246215d7d24f48ae091a2902398798e05d978b24315d6efbc00ede9a8bb91" dependencies = [ "cfg-if", "fastrand 2.3.0", - "getrandom 0.2.15", + "getrandom 0.3.1", "once_cell", "rustix", "windows-sys 0.59.0", @@ -6619,9 +6612,9 @@ dependencies = [ [[package]] name = "test-log" -version = "0.2.16" +version = "0.2.17" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3dffced63c2b5c7be278154d76b479f9f9920ed34e7574201407f0b14e2bbb93" +checksum = "e7f46083d221181166e5b6f6b1e5f1d499f3a76888826e6cb1d057554157cd0f" dependencies = [ "env_logger 0.11.6", "test-log-macros", @@ -6630,13 +6623,13 @@ dependencies = [ [[package]] name = "test-log-macros" -version = "0.2.16" +version = "0.2.17" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5999e24eaa32083191ba4e425deb75cdf25efefabe5aaccb7446dd0d4122a3f5" +checksum = "888d0c3c6db53c0fdab160d2ed5e12ba745383d3e85813f2ea0f2b1475ab553f" dependencies = [ "proc-macro2", "quote", - "syn 2.0.95", + "syn 2.0.98", ] [[package]] @@ -6656,7 +6649,16 @@ version = "1.0.69" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b6aaf5339b578ea85b50e080feb250a3e8ae8cfcdff9a461c9ec2904bc923f52" dependencies = [ - "thiserror-impl", + "thiserror-impl 1.0.69", +] + +[[package]] +name = "thiserror" +version = "2.0.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d452f284b73e6d76dd36758a0c8684b1d5be31f92b89d07fd5822175732206fc" +dependencies = [ + "thiserror-impl 2.0.11", ] [[package]] @@ -6667,7 +6669,18 @@ checksum = "4fee6c4efc90059e10f81e6d42c60a18f76588c3d74cb83a0b242a2b6c7504c1" dependencies = [ "proc-macro2", "quote", - "syn 2.0.95", + "syn 2.0.98", +] + +[[package]] +name = "thiserror-impl" +version = "2.0.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "26afc1baea8a989337eeb52b6e72a039780ce45c3edfcc9c5b9d112feeb173c2" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.98", ] [[package]] @@ -6830,7 +6843,7 @@ checksum = "6e06d43f1345a3bcd39f6a56dbb7dcab2ba47e68e8ac134855e7e2bdbaf8cab8" dependencies = [ "proc-macro2", "quote", - "syn 2.0.95", + "syn 2.0.98", ] [[package]] @@ -6883,12 +6896,12 @@ dependencies = [ "http 1.2.0", "http-body 1.0.1", "http-body-util", - "hyper 1.5.2", + "hyper 1.6.0", "hyper-timeout", "hyper-util", "percent-encoding", "pin-project", - "prost 0.13.4", + "prost 0.13.5", "socket2", "tokio", "tokio-stream", @@ -6965,7 +6978,7 @@ checksum = "395ae124c09f9e6918a2310af6038fba074bcf474ac352496d5910dd59a2226d" dependencies = [ "proc-macro2", "quote", - "syn 2.0.95", + "syn 2.0.98", ] [[package]] @@ -7062,7 +7075,7 @@ checksum = "560b82d656506509d43abe30e0ba64c56b1953ab3d4fe7ba5902747a7a3cedd5" dependencies = [ "proc-macro2", "quote", - "syn 2.0.95", + "syn 2.0.98", ] [[package]] @@ -7098,7 +7111,7 @@ checksum = "d9d30226ac9cbd2d1ff775f74e8febdab985dab14fb14aa2582c29a92d5555dc" dependencies = [ "proc-macro2", "quote", - "syn 2.0.95", + "syn 2.0.98", ] [[package]] @@ -7124,9 +7137,9 @@ checksum = "75b844d17643ee918803943289730bec8aac480150456169e647ed0b576ba539" [[package]] name = "unicode-ident" -version = "1.0.14" +version = "1.0.16" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "adb9e6ca4f869e1180728b7950e35922a7fc6397f7b641499e8f3ef06e50dc83" +checksum = "a210d160f08b701c8721ba1c726c11662f877ea6b7094007e1ca9a1041945034" [[package]] name = "unicode-linebreak" @@ -7239,19 +7252,19 @@ dependencies = [ [[package]] name = "uuid" -version = "1.11.0" +version = "1.13.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f8c5f0a0af699448548ad1a2fbf920fb4bee257eae39953ba95cb84891a0446a" +checksum = "ced87ca4be083373936a67f8de945faa23b6b42384bd5b64434850802c6dccd0" dependencies = [ - "getrandom 0.2.15", + "getrandom 0.3.1", "serde", ] [[package]] name = "valuable" -version = "0.1.0" +version = "0.1.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "830b7e5d4d90034032940e4ace0d9a9a057e7a45cd94e6c007832e39edb82f6d" +checksum = "ba73ea9cf16a25df0c8caa16c51acb937d5712a8429db78a3ee29d5dcacd3a65" [[package]] name = "value-trait" @@ -7285,9 +7298,9 @@ checksum = "5c3082ca00d5a5ef149bb8b555a72ae84c9c59f7250f013ac822ac2e49b19c64" [[package]] name = "wait-timeout" -version = "0.2.0" +version = "0.2.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9f200f5b12eb75f8c1ed65abd4b2db8a6e1b138a20de009dacee265a2498f3f6" +checksum = "09ac3b126d3914f9849036f826e054cbabdc8519970b8998ddaf3b5bd3c65f11" dependencies = [ "libc", ] @@ -7329,36 +7342,46 @@ version = "0.11.0+wasi-snapshot-preview1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9c8d87e72b64a3b4db28d11ce29237c246188f4f51057d65a7eab63b7987e423" +[[package]] +name = "wasi" +version = "0.13.3+wasi-0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "26816d2e1a4a36a2940b96c5296ce403917633dff8f3440e9b236ed6f6bacad2" +dependencies = [ + "wit-bindgen-rt", +] + [[package]] name = "wasm-bindgen" -version = "0.2.99" +version = "0.2.100" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a474f6281d1d70c17ae7aa6a613c87fce69a127e2624002df63dcb39d6cf6396" +checksum = "1edc8929d7499fc4e8f0be2262a241556cfc54a0bea223790e71446f2aab1ef5" dependencies = [ "cfg-if", "once_cell", + "rustversion", "wasm-bindgen-macro", ] [[package]] name = "wasm-bindgen-backend" -version = "0.2.99" +version = "0.2.100" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5f89bb38646b4f81674e8f5c3fb81b562be1fd936d84320f3264486418519c79" +checksum = "2f0a0651a5c2bc21487bde11ee802ccaf4c51935d0d3d42a6101f98161700bc6" dependencies = [ "bumpalo", "log", "proc-macro2", "quote", - "syn 2.0.95", + "syn 2.0.98", "wasm-bindgen-shared", ] [[package]] name = "wasm-bindgen-futures" -version = "0.4.49" +version = "0.4.50" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "38176d9b44ea84e9184eff0bc34cc167ed044f816accfe5922e54d84cf48eca2" +checksum = "555d470ec0bc3bb57890405e5d4322cc9ea83cebb085523ced7be4144dac1e61" dependencies = [ "cfg-if", "js-sys", @@ -7369,9 +7392,9 @@ dependencies = [ [[package]] name = "wasm-bindgen-macro" -version = "0.2.99" +version = "0.2.100" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2cc6181fd9a7492eef6fef1f33961e3695e4579b9872a6f7c83aee556666d4fe" +checksum = "7fe63fc6d09ed3792bd0897b314f53de8e16568c2b3f7982f468c0bf9bd0b407" dependencies = [ "quote", "wasm-bindgen-macro-support", @@ -7379,22 +7402,25 @@ dependencies = [ [[package]] name = "wasm-bindgen-macro-support" -version = "0.2.99" +version = "0.2.100" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "30d7a95b763d3c45903ed6c81f156801839e5ee968bb07e534c44df0fcd330c2" +checksum = "8ae87ea40c9f689fc23f209965b6fb8a99ad69aeeb0231408be24920604395de" dependencies = [ "proc-macro2", "quote", - "syn 2.0.95", + "syn 2.0.98", "wasm-bindgen-backend", "wasm-bindgen-shared", ] [[package]] name = "wasm-bindgen-shared" -version = "0.2.99" +version = "0.2.100" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "943aab3fdaaa029a6e0271b35ea10b72b943135afe9bffca82384098ad0e06a6" +checksum = "1a05d73b933a847d6cccdda8f838a22ff101ad9bf93e33684f39c1f5f0eece3d" +dependencies = [ + "unicode-ident", +] [[package]] name = "wasm-streams" @@ -7426,9 +7452,9 @@ dependencies = [ [[package]] name = "web-sys" -version = "0.3.76" +version = "0.3.77" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "04dd7223427d52553d3702c004d3b2fe07c148165faa56313cb00211e31c12bc" +checksum = "33b6dd2ef9186f1f2072e409e99cd22a975331a6b3591b12c764e0e55c60d5d2" dependencies = [ "js-sys", "wasm-bindgen", @@ -7520,7 +7546,7 @@ checksum = "9107ddc059d5b6fbfbffdfa7a7fe3e22a226def0b2608f72e9d552763d3e1ad7" dependencies = [ "proc-macro2", "quote", - "syn 2.0.95", + "syn 2.0.98", ] [[package]] @@ -7531,7 +7557,7 @@ checksum = "29bee4b38ea3cde66011baa44dba677c432a78593e202392d1e9070cf2a7fca7" dependencies = [ "proc-macro2", "quote", - "syn 2.0.95", + "syn 2.0.98", ] [[package]] @@ -7731,6 +7757,15 @@ dependencies = [ "windows-sys 0.48.0", ] +[[package]] +name = "wit-bindgen-rt" +version = "0.33.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3268f3d866458b787f390cf61f4bbb563b922d091359f9608842999eaee3943c" +dependencies = [ + "bitflags 2.8.0", +] + [[package]] name = "write16" version = "1.0.0" @@ -7790,7 +7825,7 @@ checksum = "2380878cad4ac9aac1e2435f3eb4020e8374b5f13c296cb75b4620ff8e229154" dependencies = [ "proc-macro2", "quote", - "syn 2.0.95", + "syn 2.0.98", "synstructure 0.13.1", ] @@ -7833,7 +7868,7 @@ checksum = "fa4f8080344d4671fb4e831a13ad1e68092748387dfc4f55e356242fae12ce3e" dependencies = [ "proc-macro2", "quote", - "syn 2.0.95", + "syn 2.0.98", ] [[package]] @@ -7853,7 +7888,7 @@ checksum = "595eed982f7d355beb85837f651fa22e90b3c044842dc7f2c2842c086f295808" dependencies = [ "proc-macro2", "quote", - "syn 2.0.95", + "syn 2.0.98", "synstructure 0.13.1", ] @@ -7882,7 +7917,7 @@ checksum = "6eafa6dfb17584ea3e2bd6e76e0cc15ad7af12b09abdd1ca55961bed9b1063c6" dependencies = [ "proc-macro2", "quote", - "syn 2.0.95", + "syn 2.0.98", ] [[package]] diff --git a/Cargo.toml b/Cargo.toml index e9bd309299..7a61b1e007 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -12,7 +12,6 @@ common-tracing = {path = "src/common/tracing", default-features = false} common-version = {path = "src/common/version", default-features = false} daft-algebra = {path = "src/daft-algebra", default-features = false} daft-catalog = {path = "src/daft-catalog", default-features = false} -daft-catalog-python-catalog = {path = "src/daft-catalog/python-catalog", optional = true} daft-compression = {path = "src/daft-compression", default-features = false} daft-connect = {path = "src/daft-connect", optional = true} daft-context = {path = "src/daft-context", default-features = false} @@ -50,6 +49,8 @@ sysinfo = {workspace = true} [features] # maturin will turn this on python = [ + "dep:pyo3", + "dep:pyo3-log", "common-daft-config/python", "common-display/python", "common-partitioning/python", @@ -57,8 +58,8 @@ python = [ "common-file-formats/python", "common-scan-info/python", "common-system-info/python", - "daft-catalog-python-catalog/python", "daft-catalog/python", + "daft-connect/python", "daft-context/python", "daft-core/python", "daft-csv/python", @@ -74,18 +75,13 @@ python = [ "daft-micropartition/python", "daft-parquet/python", "daft-physical-plan/python", + "daft-recordbatch/python", "daft-scan/python", "daft-scheduler/python", "daft-sql/python", "daft-session/python", "daft-stats/python", - "daft-recordbatch/python", "daft-writers/python", - "dep:daft-catalog-python-catalog", - "dep:daft-connect", - "daft-connect/python", - "dep:pyo3", - "dep:pyo3-log" ] [lib] diff --git a/daft/__init__.py b/daft/__init__.py index 60cec1ca7f..dd1b3e5218 100644 --- a/daft/__init__.py +++ b/daft/__init__.py @@ -59,11 +59,10 @@ def refresh_logger() -> None: ### from daft.catalog import ( + Catalog, Identifier, - read_table, - register_table, + Table, ) -from daft.context import set_execution_config, set_planning_config, execution_config_ctx, planning_config_ctx from daft.convert import ( from_arrow, from_dask_dataframe, @@ -91,7 +90,15 @@ def refresh_logger() -> None: read_lance, ) from daft.series import Series -from daft.session import Session, current_session, set_session +from daft.session import ( + Session, + create_catalog, + create_temp_table, + current_catalog, + current_session, + set_session, + set_catalog, +) from daft.sql import sql, sql_expr from daft.udf import udf from daft.viz import register_viz_hook @@ -99,8 +106,11 @@ def refresh_logger() -> None: to_struct = Expression.to_struct __all__ = [ + "Catalog", + # !! – move to daft-table "DataCatalogTable", "DataCatalogType", + # !! -------------------- "DataFrame", "DataType", "Expression", @@ -111,9 +121,13 @@ def refresh_logger() -> None: "Schema", "Series", "Session", + "Table", "TimeUnit", "coalesce", "col", + "create_catalog", + "create_temp_table", + "current_catalog", "current_session", "execution_config_ctx", "from_arrow", @@ -135,13 +149,12 @@ def refresh_logger() -> None: "read_lance", "read_parquet", "read_sql", - "read_table", "refresh_logger", - "register_table", "register_viz_hook", "set_execution_config", "set_planning_config", "set_session", + "set_catalog", "sql", "sql_expr", "struct", diff --git a/daft/catalog/__init__.py b/daft/catalog/__init__.py index dd1d923137..860049f2c4 100644 --- a/daft/catalog/__init__.py +++ b/daft/catalog/__init__.py @@ -1,163 +1,94 @@ -"""The `daft.catalog` module contains functionality for Data Catalogs. - -A Data Catalog can be understood as a system/service for users to discover, access and query their data. -Most commonly, users' data is represented as a "table". Some more modern Data Catalogs such as Unity Catalog -also expose other types of data including files, ML models, registered functions and more. - -Examples of Data Catalogs include AWS Glue, Hive Metastore, Apache Iceberg REST and Unity Catalog. - -Daft manages Data Catalogs by registering them in an internal meta-catalog, called the "DaftMetaCatalog". This -is simple a collection of data catalogs, which Daft will attempt to detect from a users' current environment. - -**Data Catalog** - -Daft recognizes a default catalog which it will attempt to use when no specific catalog name is provided. - -```python -# This will hit the default catalog -daft.read_table("my_db.my_namespace.my_table") -``` - -**Named Tables** - -Daft allows also the registration of named tables, which have no catalog associated with them. - -Note that named tables take precedence over the default catalog's table names when resolving names. - -```python -df = daft.from_pydict({"foo": [1, 2, 3]}) - -daft.catalog.register_table( - "my_table", - df, -) - -# Your table is now accessible from Daft-SQL, or Daft's `read_table` -df1 = daft.read_table("my_table") -df2 = daft.sql("SELECT * FROM my_table") -``` -""" +"""The daft-catalog moduel documentation...""" from __future__ import annotations - +from abc import ABC, abstractmethod +from typing import Sequence from collections.abc import Sequence -from daft.daft import catalog as native_catalog -from daft.logical.builder import LogicalPlanBuilder - +from daft.daft import PyIdentifier, PyTableSource from daft.dataframe import DataFrame +from daft.logical.schema import Schema -from typing import TYPE_CHECKING -if TYPE_CHECKING: - from pyiceberg.catalog import Catalog as PyIcebergCatalog - from daft.unity_catalog import UnityCatalog +class Catalog(ABC): + """Catalog documentation...""" + @staticmethod + def empty() -> Catalog: + """Returns a new in-memory catalog implementation.""" + from daft.catalog.__memory import MemoryCatalog -__all__ = [ - "Identifier", - "read_table", - "register_python_catalog", - "register_table", - "unregister_catalog", -] - -# Forward imports from the native catalog which don't require Python wrappers -unregister_catalog = native_catalog.unregister_catalog - - -def read_table(name: str) -> DataFrame: - """Finds a table with the specified name and reads it as a DataFrame. - - The provided name can be any of the following, and Daft will return them with the following order of priority: - - 1. Name of a registered dataframe/SQL view (manually registered using `daft.register_table`): `"my_registered_table"` - 2. Name of a table within the default catalog (without inputting the catalog name) for example: `"my.table.name"` - 3. Name of a fully-qualified table path with the catalog name for example: `"my_catalog.my.table.name"` - - Args: - name: The identifier for the table to read - - Returns: - A DataFrame containing the data from the specified table. - """ - native_logical_plan_builder = native_catalog.read_table(name) - return DataFrame(LogicalPlanBuilder(native_logical_plan_builder)) - - -def register_table(name: str, dataframe: DataFrame) -> str: - """Register a DataFrame as a named table. - - This function registers a DataFrame as a named table, making it accessible - via Daft-SQL or Daft's `read_table` function. - - Args: - name (str): The name to register the table under. - dataframe (daft.DataFrame): The DataFrame to register as a table. + return MemoryCatalog({}) - Returns: - str: The name of the registered table. + @staticmethod + def from_pydict(tables: dict[str, Table]) -> Catalog: + """Returns a new in-memory catalog implementation with temporary tables.""" + from daft.catalog.__memory import MemoryCatalog - Example: - >>> df = daft.from_pydict({"foo": [1, 2, 3]}) - >>> daft.catalog.register_table("my_table", df) - >>> daft.read_table("my_table") - """ - return native_catalog.register_table(name, dataframe._builder._builder) + return MemoryCatalog(tables) + # TODO UPDATE + # def from_opts(name: str, options: object | None = None) -> Catalog: + # """Loads a new catalog from the configuration options or creates an in-memory catalog if none given.""" + # if options is None: + # return Catalog._from_none(name) + # else: + # return Catalog._from_some(name, options) -def register_python_catalog(catalog: PyIcebergCatalog | UnityCatalog, name: str | None = None) -> str: - """Registers a Python catalog with Daft. + # @property + # @abstractmethod + # def inner(self) -> Catalog | None: + # """Returns the inner catalog object for direct access if neccessary.""" - Currently supports: + ### + # create_* + ### - * [PyIceberg Catalogs](https://py.iceberg.apache.org/api/) - * [Unity Catalog](https://www.getdaft.io/projects/docs/en/latest/user_guide/integrations/unity-catalog.html) + # @abstractmethod + # def create_namespace(self, name: str) -> Namespace: + # """Creates a namespace scoped to this catalog.""" - Args: - catalog (PyIcebergCatalog | UnityCatalog): The Python catalog to register. - name (str | None, optional): The name to register the catalog under. If None, this catalog is registered as the default catalog. + @abstractmethod + def create_table(self, name: str, source: TableSource | None = None) -> Table: + """Creates a table scoped to this catalog.""" - Returns: - str: The name of the registered catalog. + ### + # has_* + ### - Raises: - ValueError: If an unsupported catalog type is provided. + # @abstractmethod + # def has_namespace(self, name: str) -> bool: + # """Returns true iff this catalog has a namespace with the given name.""" - Example: - >>> from pyiceberg.catalog import load_catalog - >>> catalog = load_catalog("my_catalog") - >>> daft.catalog.register_python_catalog(catalog, "my_daft_catalog") + ### + # get_* + ### - """ - _PYICEBERG_AVAILABLE = False - try: - from pyiceberg.catalog import Catalog as PyIcebergCatalog + # @abstractmethod + # def get_namespace(self, name: str | None = None) -> Namespace: + # """Returns the given namespace if it exists, otherwise raises an exception.""" - _PYICEBERG_AVAILABLE = True - except ImportError: - pass + # @abstractmethod + # def get_table(self, name: str) -> Table: + # """Returns the given table if it exists, otherwise raises an exception.""" - _UNITY_AVAILABLE = False - try: - from daft.unity_catalog import UnityCatalog + ### + # list_* + ### - _UNITY_AVAILABLE = True - except ImportError: - pass + # @abstractmethod + # def list_namespaces(self, pattern: str | None = None) -> list[Namespace]: + # """Lists all namespaces matching the optional pattern.""" - python_catalog: PyIcebergCatalog - if _PYICEBERG_AVAILABLE and isinstance(catalog, PyIcebergCatalog): - from daft.catalog.pyiceberg import PyIcebergCatalogAdaptor + # @abstractmethod + # def list_tables(self, pattern: str | None = None) -> list[Table]: + # """Lists all tables matching the optional pattern.""" - python_catalog = PyIcebergCatalogAdaptor(catalog) - elif _UNITY_AVAILABLE and isinstance(catalog, UnityCatalog): - from daft.catalog.unity import UnityCatalogAdaptor + ### + # read_* + ### - python_catalog = UnityCatalogAdaptor(catalog) - else: - raise ValueError(f"Unsupported catalog type: {type(catalog)}") - - return native_catalog.register_python_catalog(python_catalog, name) + # def read_table(self, name: Identifier) -> DataFrame: + # raise NotImplementedError("read_table not implemented") class Identifier(Sequence): @@ -168,28 +99,27 @@ class Identifier(Sequence): >>> assert len(id) == 2 """ - _identifier: native_catalog.PyIdentifier + _identifier: PyIdentifier def __init__(self, *parts: str): """Creates an Identifier from its parts. Example: - >>> Identifier("schema", "table") - >>> # + >>> Identifier("namespace", "table") Returns: Identifier: A new identifier. """ if len(parts) < 1: raise ValueError("Identifier requires at least one part.") - self._identifier = native_catalog.PyIdentifier(parts[:-1], parts[-1]) + self._identifier = PyIdentifier(parts[:-1], parts[-1]) @staticmethod def from_sql(input: str, normalize: bool = False) -> Identifier: """Parses an Identifier from an SQL string, normalizing to lowercase if specified. Example: - >>> Identifier.from_sql("schema.table") == Identifier("schema", "table") + >>> Identifier.from_sql("namespace.table") == Identifier("namespace", "table") >>> Identifier.from_sql('"a.b"') == Identifier('"a.b."') >>> Identifier.from_sql('ABC."xYz"', normalize=True) == Identifier("abc", "xYz") @@ -197,7 +127,7 @@ def from_sql(input: str, normalize: bool = False) -> Identifier: Identifier: A new identifier. """ i = Identifier.__new__(Identifier) - i._identifier = native_catalog.PyIdentifier.from_sql(input, normalize) + i._identifier = PyIdentifier.from_sql(input, normalize) return i def __eq__(self, other: object) -> bool: @@ -216,3 +146,103 @@ def __len__(self) -> int: def __repr__(self) -> str: return f"Identifier('{self._identifier.__repr__()}')" + + +# TODO make a sequence +Namespace = tuple[str] + + +class TableSource: + _source: PyTableSource + + def __init__(self) -> None: + raise ValueError("We do not support creating a TableSource via __init__") + + @staticmethod + def _from_object(source: object = None) -> TableSource: + # TODO for future sources, consider https://github.com/Eventual-Inc/Daft/pull/2864 + if source is None: + return TableSource._from_none() + elif isinstance(source, DataFrame): + return TableSource._from_df(source) + elif isinstance(source, str): + return TableSource._from_path(source) + elif isinstance(source, Schema): + return TableSource._from_schema(source) + else: + raise Exception(f"Unknown table source: {source}") + + @staticmethod + def _from_none() -> TableSource: + # for creating temp mutable tables, but we don't have those yet + # s = TableSource.__new__(TableSource) + # s._source = PyTableSource.empty() + # return s + # todo temp workaround just use an empty schema + return TableSource._from_schema(Schema._from_fields([])) + + @staticmethod + def _from_schema(schema: Schema) -> TableSource: + # we don't have mutable temp tables, so just make an empty view + # s = TableSource.__new__(TableSource) + # s._source = PyTableSource.from_schema(schema._schema) + # return s + # todo temp workaround until create_table is wired + return TableSource._from_df(DataFrame._from_pylist([])) + + @staticmethod + def _from_df(df: DataFrame) -> TableSource: + s = TableSource.__new__(TableSource) + s._source = PyTableSource.from_view(df._builder._builder) + return s + + @staticmethod + def _from_path(path: str) -> TableSource: + # for supporting daft.create_table("t", "/path/to/data") <-> CREATE TABLE t AS '/path/to/my.data' + raise NotImplementedError("creating a table source from a path is not yet supported.") + + +class Table(ABC): + """Table documentation...""" + + def __repr__(self) -> str: + return f"Table('{self.name()}')" + + @abstractmethod + def name(self) -> str: + """Returns the table name.""" + + @abstractmethod + def schema(self) -> Schema: + """Returns the table schema.""" + + ### + # Creation Methods + ### + + @staticmethod + def _from_source(name: str, source: TableSource | None = None) -> Table: + from daft.catalog.__memory import MemoryTable + + return MemoryTable._from_source(name, source) + + ### + # DataFrame Methods + ### + + @abstractmethod + def read(self) -> DataFrame: + """Returns a DataFrame from this table.""" + + @abstractmethod + def show(self, n: int = 8) -> None: + """Shows the first n rows from this table.""" + + +__all__ = [ + "Catalog", + "Identifier", + "Namespace", + "Table", + "TableSource", +] diff --git a/daft/catalog/__memory.py b/daft/catalog/__memory.py new file mode 100644 index 0000000000..02d81b0d46 --- /dev/null +++ b/daft/catalog/__memory.py @@ -0,0 +1,54 @@ +"""An in-memory implementation for the daft catalog abstractions.""" + +from __future__ import annotations + +from daft.catalog import Catalog, Table, TableSource +from daft.dataframe.dataframe import DataFrame +from daft.logical.schema import Schema + + +class MemoryCatalog(Catalog): + """An in-memory catalog scoped to a given session.""" + + _tables: dict[str, Table] + + def __init__(self, tables: dict[str, Table]): + self._tables = tables + + def __repr__(self) -> str: + return f"MemoryCatalog('{self._name}')" + + ### + # create_* + ### + + def create_table(self, name: str, source: TableSource = None) -> Table: + raise NotImplementedError() + + +class MemoryTable(Table): + """An in-memory table holds a reference to an existing dataframe.""" + + _inner: DataFrame + + def __init__(self, inner: DataFrame) -> Table: + self._inner = inner + + def name(self) -> str: + return self._name + + def schema(self) -> Schema: + return self._inner.schema() + + def __repr__(self) -> str: + return f"MemoryTable('{self._name}')" + + ### + # DataFrame Methods + ### + + def read(self) -> DataFrame: + return self._inner + + def show(self, n: int = 8) -> None: + return self._inner.show(n) diff --git a/daft/catalog/pyiceberg.py b/daft/catalog/pyiceberg.py deleted file mode 100644 index bde293a488..0000000000 --- a/daft/catalog/pyiceberg.py +++ /dev/null @@ -1,32 +0,0 @@ -from __future__ import annotations - -from typing import TYPE_CHECKING - -if TYPE_CHECKING: - from pyiceberg.catalog import Catalog as PyIcebergCatalog - from pyiceberg.table import Table as PyIcebergTable - - from daft.dataframe import DataFrame - -from daft.catalog.python_catalog import PythonCatalog, PythonCatalogTable - - -class PyIcebergCatalogAdaptor(PythonCatalog): - def __init__(self, pyiceberg_catalog: PyIcebergCatalog): - self._catalog = pyiceberg_catalog - - def list_tables(self, prefix: str) -> list[str]: - return [".".join(tup) for tup in self._catalog.list_tables(prefix)] - - def load_table(self, name: str) -> PyIcebergTableAdaptor: - return PyIcebergTableAdaptor(self._catalog.load_table(name)) - - -class PyIcebergTableAdaptor(PythonCatalogTable): - def __init__(self, pyiceberg_table: PyIcebergTable): - self._table = pyiceberg_table - - def to_dataframe(self) -> DataFrame: - import daft - - return daft.read_iceberg(self._table) diff --git a/daft/catalog/python_catalog.py b/daft/catalog/python_catalog.py deleted file mode 100644 index 2a0f942eac..0000000000 --- a/daft/catalog/python_catalog.py +++ /dev/null @@ -1,24 +0,0 @@ -from __future__ import annotations - -from abc import abstractmethod -from typing import TYPE_CHECKING - -if TYPE_CHECKING: - from daft.dataframe import DataFrame - - -class PythonCatalog: - """Wrapper class for various Python implementations of Data Catalogs.""" - - @abstractmethod - def list_tables(self, prefix: str) -> list[str]: ... - - @abstractmethod - def load_table(self, name: str) -> PythonCatalogTable: ... - - -class PythonCatalogTable: - """Wrapper class for various Python implementations of Data Catalog Tables.""" - - @abstractmethod - def to_dataframe(self) -> DataFrame: ... diff --git a/daft/catalog/unity.py b/daft/catalog/unity.py deleted file mode 100644 index aed0b0233e..0000000000 --- a/daft/catalog/unity.py +++ /dev/null @@ -1,49 +0,0 @@ -from __future__ import annotations - -from typing import TYPE_CHECKING - -if TYPE_CHECKING: - from daft.dataframe import DataFrame - from daft.unity_catalog import UnityCatalog, UnityCatalogTable - -from daft.catalog.python_catalog import PythonCatalog, PythonCatalogTable - - -class UnityCatalogAdaptor(PythonCatalog): - def __init__(self, unity_catalog: UnityCatalog): - self._catalog = unity_catalog - - def list_tables(self, prefix: str) -> list[str]: - num_namespaces = prefix.count(".") - if prefix == "": - return [ - tbl - for cat in self._catalog.list_catalogs() - for schema in self._catalog.list_schemas(cat) - for tbl in self._catalog.list_tables(schema) - ] - elif num_namespaces == 0: - catalog_name = prefix - return [ - tbl for schema in self._catalog.list_schemas(catalog_name) for tbl in self._catalog.list_tables(schema) - ] - elif num_namespaces == 1: - schema_name = prefix - return [tbl for tbl in self._catalog.list_tables(schema_name)] - else: - raise ValueError( - f"Unrecognized catalog name or schema name, expected a '.'-separated namespace but received: {prefix}" - ) - - def load_table(self, name: str) -> UnityTableAdaptor: - return UnityTableAdaptor(self._catalog.load_table(name)) - - -class UnityTableAdaptor(PythonCatalogTable): - def __init__(self, unity_table: UnityCatalogTable): - self._table = unity_table - - def to_dataframe(self) -> DataFrame: - import daft - - return daft.read_deltalake(self._table) diff --git a/daft/context.py b/daft/context.py index 33fb502530..f7ccfd06f0 100644 --- a/daft/context.py +++ b/daft/context.py @@ -130,7 +130,9 @@ def set_planning_config( # Replace values in the DaftPlanningConfig with user-specified overrides ctx = get_context() with ctx._lock: - old_daft_planning_config = ctx._ctx._daft_planning_config if config is None else config + old_daft_planning_config = ( + ctx._ctx._daft_planning_config if config is None else config + ) new_daft_planning_config = old_daft_planning_config.with_config_values( default_io_config=default_io_config, ) @@ -226,7 +228,9 @@ def set_execution_config( # Replace values in the DaftExecutionConfig with user-specified overrides ctx = get_context() with ctx._lock: - old_daft_execution_config = ctx._ctx._daft_execution_config if config is None else config + old_daft_execution_config = ( + ctx._ctx._daft_execution_config if config is None else config + ) new_daft_execution_config = old_daft_execution_config.with_config_values( scan_tasks_min_size_bytes=scan_tasks_min_size_bytes, diff --git a/daft/daft/__init__.pyi b/daft/daft/__init__.pyi index e8b546d5ea..8de44937c0 100644 --- a/daft/daft/__init__.pyi +++ b/daft/daft/__init__.pyi @@ -1221,12 +1221,6 @@ def binary_length(expr: PyExpr) -> PyExpr: ... def binary_concat(left: PyExpr, right: PyExpr) -> PyExpr: ... def binary_slice(expr: PyExpr, start: PyExpr, length: PyExpr | None = None) -> PyExpr: ... -class PyCatalog: - @staticmethod - def new() -> PyCatalog: ... - def register_table(self, name: str, logical_plan_builder: LogicalPlanBuilder) -> None: ... - def copy_from(self, other: PyCatalog) -> None: ... - class PySeries: @staticmethod def from_arrow(name: str, pyarrow_array: pa.Array) -> PySeries: ... @@ -1851,6 +1845,36 @@ class SystemInfo: def total_memory(self) -> int: ... def cpu_count(self) -> int | None: ... +### +# daft-catalog +### + +class PyCatalog: + def __init__(self): ... + def create_table(self, name: str, source: PyTableSource | None): ... + +class PyIdentifier: + def __init__(self, namespace: tuple[str, ...], name: str): ... + @staticmethod + def from_sql(input: str, normalize: bool): ... + def eq(self, other: PyIdentifier) -> bool: ... + def getitem(self, index: int) -> str: ... + def __len__(self) -> int: ... + def __repr__(self) -> str: ... + +class PyNamespace: + def __init__(self): ... + +class PyTable: + def __init__(self): ... + +class PyTableSource: + def __init__(self): ... + @staticmethod + def from_schema(schema: PySchema) -> PyTableSource: ... + @staticmethod + def from_view(view: LogicalPlanBuilder) -> PyTableSource: ... + ### # daft-session ### @@ -1859,3 +1883,19 @@ class PySession: def __init__(self): ... @staticmethod def empty() -> PySession: ... + def current_catalog(self) -> PyCatalog: ... + def current_namespace(self) -> PyNamespace: ... + def attach(self, name: str, catalog: PyCatalog): ... + def detach(self, name: str): ... + def create_catalog(self, name: str) -> PyCatalog: ... + def create_namespace(self, name: str) -> PyNamespace: ... + def create_table(self, name: str, source: PyTableSource): ... + def create_temp_table(self, name: str, source: PyTableSource): ... + def get_catalog(self, name: str) -> PyCatalog: ... + def get_namespace(self, name: str) -> PyNamespace: ... + def get_table(self, name: PyIdentifier) -> PyTable: ... + def list_catalogs(self, pattern: None | str = None) -> list[PyCatalog]: ... + def list_namespaces(self, pattern: None | str = None) -> list[PyNamespace]: ... + def list_tables(self, pattern: None | str = None) -> list[PyTable]: ... + def set_catalog(self, name: str): ... + def set_namespace(self, name: str): ... diff --git a/daft/daft/catalog.pyi b/daft/daft/catalog.pyi deleted file mode 100644 index bb8457afa0..0000000000 --- a/daft/daft/catalog.pyi +++ /dev/null @@ -1,20 +0,0 @@ -from typing import TYPE_CHECKING - -from daft.daft import LogicalPlanBuilder as PyLogicalPlanBuilder - -if TYPE_CHECKING: - from daft.catalog.python_catalog import PythonCatalog - -class PyIdentifier: - def __init__(self, namespace: tuple[str, ...], name: str): ... - @staticmethod - def from_sql(input: str, normalize: bool): ... - def eq(self, other: PyIdentifier) -> bool: ... - def getitem(self, index: int) -> str: ... - def __len__(self) -> int: ... - def __repr__(self) -> str: ... - -def read_table(name: str) -> PyLogicalPlanBuilder: ... -def register_table(name: str, plan_builder: PyLogicalPlanBuilder) -> str: ... -def register_python_catalog(catalog: PythonCatalog, catalog_name: str | None) -> str: ... -def unregister_catalog(catalog_name: str | None) -> bool: ... diff --git a/daft/session.py b/daft/session.py index 9ba4f315f6..9a75076226 100644 --- a/daft/session.py +++ b/daft/session.py @@ -1,6 +1,8 @@ from __future__ import annotations +from daft.catalog import Catalog, Identifier, Namespace, Table, TableSource from daft.daft import PySession +from daft.dataframe import DataFrame class Session: @@ -35,6 +37,121 @@ def _from_env() -> Session: # todo session builders, raise if DAFT_SESSION=0 return Session.empty() + ### + # exec + ### + + def exec(self, input: str) -> DataFrame: + return self._session.exec(input) + + ### + # attach & detach + ### + + def attach(self, catalog: Catalog, alias: str) -> Catalog: + """Attaches the catalog to this session.""" + return self._session.attach(catalog, alias) + + def detach(self, catalog: str): + """Detaches the catalog from this session.""" + return self._session.detach(catalog) + + ### + # create_* + ### + + # TODO rchowell + def create_catalog(self, name: str) -> Catalog: + """Create a new catalog scoped to this session.""" + return self._session.create_catalog(name) + + # TODO rchowell + def create_namespace(self, name: str) -> Namespace: + """Create a new namespace scope to this session's current catalog.""" + return self._session.create_namespace(name) + + # TODO rchowell + def create_table(self, name: str, source: TableSource = None) -> Table: + """Creates a new table scoped to this session's current catalog and namespace.""" + return self._session.create_table(name, source) + + def create_temp_table(self, name: str, source: object = None) -> Table: + """Creates a temp table scoped to this session's lifetime.""" + return self._session.create_temp_table(name, TableSource._from_object(source)._source) + + ### + # session state + ### + + def current_catalog(self) -> Catalog: + """Returns the session's current catalog.""" + return self._session.current_catalog() + + # TODO rchowell + def current_namespace(self) -> Namespace: + """Returns the session's current namespace.""" + return self._session.current_namespace() + + ### + # get_* + ### + + def get_catalog(self, name: str) -> Catalog: + """Returns the catalog or raises an exception if it does not exist.""" + return self._session.get_catalog(name) + + # TODO rchowell + def get_namespace(self, name: str) -> Namespace: + """Returns the namespace or raises an exception if it does not exist.""" + return self._session.get_namespace(name) + + def get_table(self, name: str | Identifier) -> Table: + """Returns the table or raises an exception if it does not exist.""" + if isinstance(name, str): + name = Identifier(*name.split(".")) + return self._session.get_table(name._identifier) + + ### + # has_* + ### + + def has_catalog(self, name: str) -> bool: + return self._session.has_catalog(name) + + def has_namespace(self, name: str | Identifier) -> bool: + if isinstance(name, str): + name = Identifier(*name.split(".")) + return self._session.has_namespace(name) + + def has_table(self, name: str | Identifier) -> bool: + if isinstance(name, str): + name = Identifier(*name.split(".")) + return self._session.has_table(name) + + ### + # list_* + ### + + def list_catalogs(self, pattern: None | str = None) -> list[str]: + """Returns a list of available catalogs.""" + return self._session.list_catalogs(pattern) + + def list_tables(self, pattern: None | str = None) -> list[Identifier]: + """Returns a list of available tables.""" + return self._session.list_tables(pattern) + + ### + # set_* + ### + + def set_catalog(self, name: str): + """Set the given catalog as current_catalog or err if not exists.""" + self._session.set_catalog(name) + + def set_namespace(self, name: str): + """Set the given namespace as current_namespace or err if not exists.""" + self._session.set_namespace(name) + ### # global active session @@ -62,11 +179,31 @@ def _session() -> Session: ### +def current_catalog() -> Catalog: + """Returns the global session's current catalog.""" + return _session().current_catalog() + + def current_session() -> Session: """Returns the global context's current session.""" return _session() +### +# create_* +## + + +def create_catalog(name: str) -> Catalog: + """Creates a catalog scoped to the global session.""" + return _session().create_catalog(name) + + +def create_temp_table(name: str, source: object | None = None) -> Catalog: + """Creates a temporary table scoped to the global session.""" + return _session().create_temp_table(name, source) + + ### # set_.* (session management) ### @@ -82,3 +219,8 @@ def set_session(session: Session): # ``` global _SESSION _SESSION = session + + +def set_catalog(name: str): + """Sets the global session's current catalog.""" + _session().set_catalog(name) diff --git a/daft/sql/sql.py b/daft/sql/sql.py index 31f9016fe9..1af3251e1c 100644 --- a/daft/sql/sql.py +++ b/daft/sql/sql.py @@ -4,8 +4,8 @@ from typing import Optional from daft.api_annotations import PublicAPI +from daft.catalog import Catalog, TableSource from daft.context import get_context -from daft.daft import PyCatalog as _PyCatalog from daft.daft import sql as _sql from daft.daft import sql_expr as _sql_expr from daft.dataframe import DataFrame @@ -17,25 +17,25 @@ class SQLCatalog: """SQLCatalog is a simple map from table names to dataframes used in query planning. - EXPERIMENTAL: This features is early in development and will change. + DEPRECATED: This features is deprecated, please using Catalog.from_pydict() in daft.catalog. """ - _catalog: _PyCatalog = None # type: ignore + _catalog: Catalog = None # type: ignore def __init__(self, tables: dict) -> None: """Create a new SQLCatalog from a dictionary of table names to dataframes.""" - self._catalog = _PyCatalog.new() - for name, df in tables.items(): - self._catalog.register_table(name, df._get_current_builder()._builder) + self._catalog = Catalog.from_pydict(tables) def __str__(self) -> str: return str(self._catalog) def register_table(self, name: str, df: DataFrame): - self._catalog.register_table(name, df._get_current_builder()._builder) + """DEPRECATED: Please use daft.catalog's Catalog.create_table API.""" + self._catalog.create_table(name, TableSource._from_df(df)) def _copy_from(self, other: "SQLCatalog") -> None: - self._catalog.copy_from(other._catalog) + raise NotImplementedError() + # self._catalog.copy_from(other._catalog) @PublicAPI diff --git a/src/daft-catalog/Cargo.toml b/src/daft-catalog/Cargo.toml index 70990e0cae..3db41d9eb5 100644 --- a/src/daft-catalog/Cargo.toml +++ b/src/daft-catalog/Cargo.toml @@ -2,7 +2,6 @@ common-error = {path = "../common/error", default-features = false} daft-core = {path = "../daft-core", default-features = false} daft-logical-plan = {path = "../daft-logical-plan", default-features = false} -lazy_static = {workspace = true} pyo3 = {workspace = true, optional = true} sqlparser = {workspace = true} snafu.workspace = true diff --git a/src/daft-catalog/python-catalog/Cargo.toml b/src/daft-catalog/python-catalog/Cargo.toml deleted file mode 100644 index 1893bbd4ee..0000000000 --- a/src/daft-catalog/python-catalog/Cargo.toml +++ /dev/null @@ -1,14 +0,0 @@ -[dependencies] -daft-catalog = {path = "..", default-features = false} -daft-logical-plan = {path = "../../daft-logical-plan", default-features = false} -pyo3 = {workspace = true, optional = true} -snafu = {workspace = true} - -[features] -python = ["daft-catalog/python", "daft-logical-plan/python", "dep:pyo3"] - -[package] -description = "Python implementations of Daft DataCatalogTable (backed by a PythonCatalog abstract class)" -name = "daft-catalog-python-catalog" -edition.workspace = true -version.workspace = true diff --git a/src/daft-catalog/python-catalog/src/lib.rs b/src/daft-catalog/python-catalog/src/lib.rs deleted file mode 100644 index a098fa319b..0000000000 --- a/src/daft-catalog/python-catalog/src/lib.rs +++ /dev/null @@ -1,2 +0,0 @@ -#[cfg(feature = "python")] -pub mod python; diff --git a/src/daft-catalog/python-catalog/src/python.rs b/src/daft-catalog/python-catalog/src/python.rs deleted file mode 100644 index 90d421c9b3..0000000000 --- a/src/daft-catalog/python-catalog/src/python.rs +++ /dev/null @@ -1,172 +0,0 @@ -use std::sync::Arc; - -use daft_catalog::{ - error::{Error as DaftCatalogError, Result}, - DataCatalog, DataCatalogTable, -}; -use daft_logical_plan::{LogicalPlanBuilder, PyLogicalPlanBuilder}; -use pyo3::prelude::*; -use snafu::{ResultExt, Snafu}; - -#[derive(Debug, Snafu)] -pub enum Error { - #[snafu(display("Error while listing tables: {}", source))] - ListTables { source: pyo3::PyErr }, - - #[snafu(display("Error while getting table {}: {}", table_name, source))] - GetTable { - source: pyo3::PyErr, - table_name: String, - }, - - #[snafu(display("Error while reading table {} into Daft: {}", table_name, source))] - ReadTable { - source: pyo3::PyErr, - table_name: String, - }, -} - -impl From for DaftCatalogError { - fn from(value: Error) -> Self { - match value { - Error::ListTables { source } => DaftCatalogError::PythonError { - source, - context: "listing tables".to_string(), - }, - Error::GetTable { source, table_name } => DaftCatalogError::PythonError { - source, - context: format!("getting table `{}`", table_name), - }, - Error::ReadTable { source, table_name } => DaftCatalogError::PythonError { - source, - context: format!("reading table `{}`", table_name), - }, - } - } -} - -/// Wrapper around a `daft.catalog.python_catalog.PythonCatalogTable` -pub struct PythonTable { - table_name: String, - table_pyobj: PyObject, -} - -impl PythonTable { - pub fn new(table_name: String, table_pyobj: PyObject) -> Self { - Self { - table_name, - table_pyobj, - } - } -} - -impl DataCatalogTable for PythonTable { - fn to_logical_plan_builder(&self) -> daft_catalog::error::Result { - Python::with_gil(|py| { - let dataframe = self - .table_pyobj - .bind(py) - .getattr("to_dataframe") - .and_then(|to_dataframe_method| to_dataframe_method.call0()) - .with_context(|_| ReadTableSnafu { - table_name: self.table_name.clone(), - })?; - - let py_logical_plan_builder = dataframe - .getattr("_builder") - .unwrap() - .getattr("_builder") - .unwrap(); - let py_logical_plan_builder = py_logical_plan_builder - .downcast::() - .unwrap(); - Ok(py_logical_plan_builder.borrow().builder.clone()) - }) - } -} - -/// Wrapper around a `daft.catalog.python_catalog.PythonCatalog` -#[derive(Debug)] -pub struct PythonCatalog { - python_catalog_pyobj: PyObject, -} - -impl PythonCatalog { - pub fn new(python_catalog_pyobj: PyObject) -> Self { - Self { - python_catalog_pyobj, - } - } -} - -impl DataCatalog for PythonCatalog { - fn list_tables(&self, prefix: &str) -> Result> { - Python::with_gil(|py| { - let python_catalog = self.python_catalog_pyobj.bind(py); - - Ok(python_catalog - .getattr("list_tables") - .and_then(|list_tables_method| list_tables_method.call1((prefix,))) - .and_then(|tables| tables.extract::>()) - .with_context(|_| ListTablesSnafu)?) - }) - } - - fn get_table(&self, name: &str) -> Result>> { - Python::with_gil(|py| { - let python_catalog = self.python_catalog_pyobj.bind(py); - let list_tables_method = - python_catalog - .getattr("load_table") - .with_context(|_| GetTableSnafu { - table_name: name.to_string(), - })?; - let table = list_tables_method - .call1((name,)) - .with_context(|_| GetTableSnafu { - table_name: name.to_string(), - })?; - let python_table = PythonTable::new(name.to_string(), table.unbind()); - Ok(Some(Box::new(python_table) as Box)) - }) - } -} - -/// Registers an PythonCatalog instance -/// -/// This function registers a Python-based catalog with the Daft catalog system. -/// -/// Args: -/// python_catalog_obj (PyObject): The Python catalog object to register. -/// catalog_name (Optional[str]): The name to give the catalog. If None, a default name will be used. -/// -/// Returns: -/// str: The name of the registered catalog (always "default" in the current implementation). -/// -/// Raises: -/// PyErr: If there's an error during the registration process. -/// -/// Example: -/// >>> import daft -/// >>> from my_python_catalog import MyPythonCatalog -/// >>> python_catalog = MyPythonCatalog() -/// >>> daft.register_python_catalog(python_catalog, "my_catalog") -/// 'default' -#[pyfunction] -#[pyo3( - name = "register_python_catalog", - signature = (python_catalog_obj, catalog_name=None) -)] -pub fn py_register_python_catalog( - python_catalog_obj: PyObject, - catalog_name: Option<&str>, -) -> PyResult { - let catalog = PythonCatalog::new(python_catalog_obj); - daft_catalog::global_catalog::register_catalog(Arc::new(catalog), catalog_name); - Ok("default".to_string()) -} - -pub fn register_modules(parent: &Bound) -> PyResult<()> { - parent.add_wrapped(wrap_pyfunction!(py_register_python_catalog))?; - Ok(()) -} diff --git a/src/daft-catalog/src/bindings.rs b/src/daft-catalog/src/bindings.rs new file mode 100644 index 0000000000..3ba8fa8624 --- /dev/null +++ b/src/daft-catalog/src/bindings.rs @@ -0,0 +1,55 @@ +use std::collections::HashMap; + +/// Bindings +/// +/// Notes: +/// - Currently using Clone because all references are arcs. +/// - This is intentionally lightweight and everything is exact-case. +/// - All APIs are non-fallible because callers determine what is an error. +/// - It does not necessarily have map semantics. +/// - Callers are responsible for case-normalization hence String, &str. +/// - Intentionally using String and &str rather than Into and AsRef. +#[derive(Debug)] +pub struct Bindings(HashMap); + +impl Bindings { + /// Creates an empty catalog provider. + pub fn empty() -> Self { + Self(HashMap::new()) + } + + /// Inserts a new binding with ownership. + pub fn insert(&mut self, name: String, object: T) { + self.0.insert(name, object); + } + + /// Removes the binding if it exists. + pub fn remove(&mut self, name: &str) { + self.0.remove(name); + } + + /// Returns true if the binding exists. + pub fn exists(&self, name: &str) -> bool { + self.0.contains_key(name) + } + + /// Get an object reference by name. + pub fn get(&self, name: &str) -> Option<&T> { + self.0.get(name) + } + + /// List all objects matching the pattern. + pub fn list(&self, pattern: Option<&str>) -> Vec { + self.0 + .keys() + .map(|k| k.as_str()) + .filter(|k| pattern.is_none() || k.contains(pattern.unwrap_or(""))) + .map(|k| k.to_string()) + .collect() + } + + /// Returns true iff there are no bindings. + pub fn is_empty(&self) -> bool { + self.0.is_empty() + } +} diff --git a/src/daft-catalog/src/catalog.rs b/src/daft-catalog/src/catalog.rs index e9354a3da2..0e2bb0e8c6 100644 --- a/src/daft-catalog/src/catalog.rs +++ b/src/daft-catalog/src/catalog.rs @@ -1,43 +1,26 @@ -use std::{collections::HashMap, sync::Arc}; +use std::sync::Arc; -use crate::DataCatalog; +use crate::{bindings::Bindings, error::Result, Identifier, Table}; -/// Catalogs is a collection of referenceable catalogs (glorified map). -/// -/// Notes: -/// - This is intentionally lightweight and everything is exact-case. -/// - All APIs are non-fallible because callers determine what is an error. -/// - It does not necessarily have map semantics. -/// - Callers are responsible for case-normalization hence String, &str. -/// - Intentionally using String and &str rather than Into and AsRef. -#[derive(Debug)] -pub struct Catalogs(HashMap>); +/// Catalog implementation reference. +pub type CatalogRef = Arc; -impl Catalogs { - /// Creates an empty catalogs collection - pub fn empty() -> Catalogs { - Self(HashMap::new()) - } - - /// Attaches a catalog to this catalog collection. - pub fn attach(&mut self, name: String, catalog: Arc) { - self.0.insert(name, catalog); - } +/// CatalogProvider is a collection of referenceable catalogs. +pub type CatalogProvider = Bindings; - /// Detaches a catalog from this catalog collection. - pub fn detach(&mut self, name: &str) { - self.0.remove(name); - } +/// A catalog provides object metadata such as namespaces, tables, and functions. +pub trait Catalog: Sync + Send + std::fmt::Debug { + /// Returns the catalog name. + fn name(&self) -> String; - /// Get the catalog by name. - pub fn get(&self, name: &str) -> Option> { - self.0.get(name).map(Arc::clone) - } + /// Returns the given table if it exists. + fn get_table(&self, name: &Identifier) -> Result>>; - /// Returns true iff a catalog with the given name exists (exact-case). - pub fn exists(&self, name: &str) -> bool { - self.0.contains_key(name) + /// Leverage dynamic dispatch to return the inner object for a PyCatalogImpl (generics?) + #[cfg(feature = "python")] + fn to_py(&self, _: pyo3::Python<'_>) -> pyo3::PyObject { + panic!( + "missing to_py implementation, consider PyCatalog(self) as the blanket implementation" + ) } } - -// TODO Catalog trait for implementations. diff --git a/src/daft-catalog/src/data_catalog.rs b/src/daft-catalog/src/data_catalog.rs deleted file mode 100644 index 573de5cf93..0000000000 --- a/src/daft-catalog/src/data_catalog.rs +++ /dev/null @@ -1,13 +0,0 @@ -use crate::{data_catalog_table::DataCatalogTable, error::Result}; - -/// DataCatalog is a catalog of data sources -/// -/// It allows registering and retrieving data sources, as well as querying their schemas. -/// The catalog is used by the query planner to resolve table references in queries. -pub trait DataCatalog: Sync + Send + std::fmt::Debug { - /// Lists the fully-qualified names of tables in the catalog with the specified prefix - fn list_tables(&self, prefix: &str) -> Result>; - - /// Retrieves a [`DataCatalogTable`] from this [`DataCatalog`] if it exists - fn get_table(&self, name: &str) -> Result>>; -} diff --git a/src/daft-catalog/src/data_catalog_table.rs b/src/daft-catalog/src/data_catalog_table.rs deleted file mode 100644 index 05fd265df2..0000000000 --- a/src/daft-catalog/src/data_catalog_table.rs +++ /dev/null @@ -1,11 +0,0 @@ -use daft_logical_plan::LogicalPlanBuilder; - -use crate::error; - -/// A Table in a Data Catalog -/// -/// This is a trait because there are many different implementations of this, for example -/// Iceberg, DeltaLake, Hive and more. -pub trait DataCatalogTable { - fn to_logical_plan_builder(&self) -> error::Result; -} diff --git a/src/daft-catalog/src/lib.rs b/src/daft-catalog/src/lib.rs index c056f5881b..880bc6e79f 100644 --- a/src/daft-catalog/src/lib.rs +++ b/src/daft-catalog/src/lib.rs @@ -1,226 +1,16 @@ -pub mod catalog; -mod data_catalog; -mod data_catalog_table; +mod bindings; +mod catalog; pub mod error; mod identifier; -// Export public-facing traits -use std::{collections::HashMap, default, sync::Arc}; +mod table; -use daft_logical_plan::LogicalPlanBuilder; -pub use data_catalog::DataCatalog; -pub use data_catalog_table::DataCatalogTable; +pub use bindings::*; +pub use catalog::*; +pub use identifier::*; +pub use table::*; #[cfg(feature = "python")] pub mod python; -use error::{Error, Result}; -pub use identifier::*; - -pub mod global_catalog { - use std::sync::{Arc, RwLock}; - - use lazy_static::lazy_static; - - use crate::{DaftCatalog, DataCatalog}; - - lazy_static! { - pub(crate) static ref GLOBAL_DAFT_META_CATALOG: RwLock = - RwLock::new(DaftCatalog::new_from_env()); - } - - /// Register a DataCatalog with the global DaftMetaCatalog - pub fn register_catalog(catalog: Arc, name: Option<&str>) { - GLOBAL_DAFT_META_CATALOG - .write() - .unwrap() - .register_catalog(catalog, name); - } - - /// Unregisters a catalog with the global DaftMetaCatalog - pub fn unregister_catalog(name: Option<&str>) -> bool { - GLOBAL_DAFT_META_CATALOG - .write() - .unwrap() - .unregister_catalog(name) - } -} - -/// Name of the default catalog -static DEFAULT_CATALOG_NAME: &str = "default"; - -/// The [`DaftMetaCatalog`] is a catalog of [`DataCatalog`] implementations -/// -/// Users of Daft can register various [`DataCatalog`] with Daft, enabling -/// discovery of tables across various [`DataCatalog`] implementations. -#[derive(Debug, Clone, Default)] -pub struct DaftCatalog { - /// Map of catalog names to the DataCatalog impls. - /// - /// NOTE: The default catalog is always named "default" - data_catalogs: HashMap>, - - /// LogicalPlans that were "named" and registered with Daft - named_tables: HashMap, -} - -impl DaftCatalog { - /// Create a `DaftMetaCatalog` from the current environment - pub fn new_from_env() -> Self { - // TODO: Parse a YAML file to produce the catalog - DaftCatalog { - data_catalogs: default::Default::default(), - named_tables: default::Default::default(), - } - } - - /// Register a new [`DataCatalog`] with the `DaftMetaCatalog`. - /// - /// # Arguments - /// - /// * `catalog` - The [`DataCatalog`] to register. - pub fn register_catalog(&mut self, catalog: Arc, name: Option<&str>) { - let name = name.unwrap_or(DEFAULT_CATALOG_NAME); - self.data_catalogs.insert(name.to_string(), catalog); - } - - /// Unregister a [`DataCatalog`] from the `DaftMetaCatalog`. - /// - /// # Arguments - /// - /// * `name` - The name of the catalog to unregister. If None, the default catalog will be unregistered. - /// - /// # Returns - /// - /// Returns `true` if a catalog was successfully unregistered, `false` otherwise. - pub fn unregister_catalog(&mut self, name: Option<&str>) -> bool { - let name = name.unwrap_or(DEFAULT_CATALOG_NAME); - self.data_catalogs.remove(name).is_some() - } - - /// Registers a LogicalPlan with a name in the DaftMetaCatalog - pub fn register_table( - &mut self, - name: &str, - view: impl Into, - ) -> Result<()> { - // TODO this API is being removed, for now preserve the exact name as if it were delimited. - self.named_tables.insert(name.into(), view.into()); - Ok(()) - } - - /// Check if a named table is registered in the DaftCatalog - pub fn contains_table(&self, name: &str) -> bool { - self.named_tables.contains_key(name) - } - - /// Provides high-level functionality for reading a table of data against a [`DaftMetaCatalog`] - /// - /// Resolves the provided table_identifier against the catalog: - /// - /// 1. If there is an exact match for the provided `table_identifier` in the catalog's registered named tables, immediately return the exact match - /// 2. If the [`DaftMetaCatalog`] has a default catalog, we will attempt to resolve the `table_identifier` against the default catalog - /// 3. If the `table_identifier` is hierarchical (delimited by "."), use the first component as the Data Catalog name and resolve the rest of the components against - /// the selected Data Catalog - pub fn read_table(&self, table_identifier: &str) -> error::Result { - // If the name is an exact match with a registered view, return it. - if let Some(view) = self.named_tables.get(table_identifier) { - return Ok(view.clone()); - } - - let mut searched_catalog_name = "default"; - let mut searched_table_name = table_identifier; - - // Check the default catalog for a match - if let Some(default_data_catalog) = self.data_catalogs.get(DEFAULT_CATALOG_NAME) { - if let Some(tbl) = default_data_catalog.get_table(table_identifier)? { - return tbl.as_ref().to_logical_plan_builder(); - } - } - - // Try to parse the catalog name from the provided table identifier by taking the first segment, split by '.' - if let Some((catalog_name, table_name)) = table_identifier.split_once('.') { - if let Some(data_catalog) = self.data_catalogs.get(catalog_name) { - searched_catalog_name = catalog_name; - searched_table_name = table_name; - if let Some(tbl) = data_catalog.get_table(table_name)? { - return tbl.as_ref().to_logical_plan_builder(); - } - } - } - - // Return the error containing the last catalog/table pairing that we attempted to search on - Err(Error::TableNotFound { - catalog_name: searched_catalog_name.to_string(), - table_id: searched_table_name.to_string(), - }) - } - - /// Copy from another catalog, using tables from other in case of conflict - pub fn copy_from(&mut self, other: &Self) { - for (name, plan) in &other.named_tables { - self.named_tables.insert(name.clone(), plan.clone()); - } - for (name, catalog) in &other.data_catalogs { - self.data_catalogs.insert(name.clone(), catalog.clone()); - } - } - - /// TODO remove py register and read methods are moved to session - /// I cannot remove DaftMetaCatalog until I invert the dependency - /// so that the current register_ methods use the session rather than the catalog. - pub fn into_catalog_map(self) -> HashMap> { - self.data_catalogs - } -} - -#[cfg(test)] -mod tests { - use std::sync::Arc; - - use daft_core::prelude::*; - use daft_logical_plan::{ - ops::Source, source_info::PlaceHolderInfo, ClusteringSpec, LogicalPlan, LogicalPlanRef, - SourceInfo, - }; - - use super::*; - - fn mock_plan() -> LogicalPlanRef { - let schema = Arc::new( - Schema::new(vec![ - Field::new("text", DataType::Utf8), - Field::new("id", DataType::Int32), - ]) - .unwrap(), - ); - LogicalPlan::Source(Source::new( - schema.clone(), - Arc::new(SourceInfo::PlaceHolder(PlaceHolderInfo { - source_schema: schema, - clustering_spec: Arc::new(ClusteringSpec::unknown()), - source_id: 0, - })), - )) - .arced() - } - - #[test] - fn test_register_and_unregister_named_table() { - let mut catalog = DaftCatalog::new_from_env(); - let plan = LogicalPlanBuilder::from(mock_plan()); - - // Register a table - assert!(catalog.register_table("test_table", plan.clone()).is_ok()); - } - - #[test] - fn test_read_registered_table() { - let mut catalog = DaftCatalog::new_from_env(); - let plan = LogicalPlanBuilder::from(mock_plan()); - - catalog.register_table("test_table", plan).unwrap(); - - assert!(catalog.read_table("test_table").is_ok()); - assert!(catalog.read_table("non_existent_table").is_err()); - } -} +#[cfg(feature = "python")] +pub use python::register_modules; diff --git a/src/daft-catalog/src/python.rs b/src/daft-catalog/src/python.rs index 1bbea213c7..5198537dd0 100644 --- a/src/daft-catalog/src/python.rs +++ b/src/daft-catalog/src/python.rs @@ -1,94 +1,39 @@ -use daft_logical_plan::PyLogicalPlanBuilder; +use daft_core::{prelude::SchemaRef, python::PySchema}; +use daft_logical_plan::{LogicalPlanRef, PyLogicalPlanBuilder}; use pyo3::{exceptions::PyIndexError, prelude::*}; -use crate::{global_catalog, identifier::Identifier}; - -/// Read a table from the specified `DaftMetaCatalog`. -/// -/// This function reads a table from a `DaftMetaCatalog` and returns a PyLogicalPlanBuilder -/// object representing the plan required to read the table. -/// -/// The provided `table_identifier` can be: -/// -/// 1. Name of a registered dataframe/SQL view (manually registered using `DaftMetaCatalog.register_view`) -/// 2. Name of a table within the default catalog (without inputting the catalog name) for example: `"my.table.name"` -/// 3. Name of a fully-qualified table path with the catalog name for example: `"my_catalog.my.table.name"` -/// -/// Args: -/// table_identifier (str): The identifier of the table to read. -/// -/// Returns: -/// PyLogicalPlanBuilder: A PyLogicalPlanBuilder object representing the table's data. -/// -/// Raises: -/// DaftError: If the table cannot be read or the specified table identifier is not found. -/// -/// Example: -/// >>> import daft -/// >>> df = daft.read_table("foo") -#[pyfunction] -#[pyo3(name = "read_table")] -fn py_read_table(table_identifier: &str) -> PyResult { - let logical_plan_builder = global_catalog::GLOBAL_DAFT_META_CATALOG - .read() - .unwrap() - .read_table(table_identifier)?; - Ok(PyLogicalPlanBuilder::new(logical_plan_builder)) -} +use crate::{error::Result, Catalog, CatalogRef, Identifier, Table, TableRef, TableSource}; + +/// PyCatalog implements the Catalog ABC for some Catalog trait impl (rust->py). +#[pyclass] +pub struct PyCatalog(CatalogRef); -/// Register a table with the global catalog. -/// -/// This function registers a table with the global `DaftMetaCatalog` using the provided -/// table identifier and logical plan. -/// -/// Args: -/// table_identifier (str): The identifier to use for the registered table. -/// logical_plan (PyLogicalPlanBuilder): The logical plan representing the table's data. -/// -/// Returns: -/// str: The table identifier used for registration. -/// -/// Example: -/// >>> import daft -/// >>> df = daft.read_csv("data.csv") -/// >>> daft.register_table("my_table", df) -#[pyfunction] -#[pyo3(name = "register_table")] -fn py_register_table( - table_identifier: &str, - logical_plan: &PyLogicalPlanBuilder, -) -> PyResult { - global_catalog::GLOBAL_DAFT_META_CATALOG - .write() - .unwrap() - .register_table(table_identifier, logical_plan.builder.clone())?; - Ok(table_identifier.to_string()) +#[pymethods] +impl PyCatalog { + fn name(&self) -> String { + self.0.name() + } } -/// Unregisters a catalog from the Daft catalog system -/// -/// This function removes a previously registered catalog from the Daft catalog system. -/// -/// Args: -/// catalog_name (Optional[str]): The name of the catalog to unregister. If None, the default catalog will be unregistered. -/// -/// Returns: -/// bool: True if a catalog was successfully unregistered, False otherwise. -/// -/// Example: -/// >>> import daft -/// >>> daft.unregister_catalog("my_catalog") -/// True -#[pyfunction] -#[pyo3( - name = "unregister_catalog", - signature = (catalog_name=None) -)] -pub fn py_unregister_catalog(catalog_name: Option<&str>) -> bool { - crate::global_catalog::unregister_catalog(catalog_name) +/// PyCatalogImpl implements the Catalog trait for some Catalog ABC impl (py->rust). +#[derive(Debug)] +pub struct PyCatalogImpl(PyObject); + +impl Catalog for PyCatalogImpl { + fn name(&self) -> String { + todo!() + } + + fn get_table(&self, _name: &Identifier) -> Result>> { + todo!() + } + + fn to_py(&self, py: Python<'_>) -> PyObject { + self.0.extract(py).expect("failed to extract PyObject") + } } -/// Bridge from identifier.py to identifier.rs +/// PyIdentifier maps identifier.py to identifier.rs #[pyclass(sequence)] #[derive(Debug, Clone)] pub struct PyIdentifier(Identifier); @@ -136,19 +81,95 @@ impl PyIdentifier { } } +/// PyTableSource wraps either a schema or dataframe. +#[pyclass] +pub struct PyTableSource(TableSource); + +impl From for PyTableSource { + fn from(source: TableSource) -> Self { + Self(source) + } +} + +/// PyTableSource -> TableSource +impl AsRef for PyTableSource { + fn as_ref(&self) -> &TableSource { + &self.0 + } +} + +#[pymethods] +impl PyTableSource { + #[staticmethod] + pub fn from_schema(schema: PySchema) -> PyTableSource { + Self(TableSource::Schema(schema.schema)) + } + + #[staticmethod] + pub fn from_view(view: &PyLogicalPlanBuilder) -> PyTableSource { + Self(TableSource::View(view.builder.build())) + } +} + +/// PyTable implements the Table ABC for some Table trait impl (rust->py). +#[pyclass] +pub struct PyTable(TableRef); + +impl PyTable { + pub fn new(table: TableRef) -> Self { + Self(table) + } +} + +#[pymethods] +impl PyTable {} + +/// PyTableImpl implements the Table trait for some Table ABC impl (py->rust). +#[derive(Debug)] +pub struct PyTableImpl(PyObject); + +impl Table for PyTableImpl { + fn get_schema(&self) -> SchemaRef { + todo!() + } + + fn get_logical_plan(&self) -> Result { + todo!() + } + + fn to_py(&self, py: Python<'_>) -> PyResult { + self.0.extract(py) + } +} + +impl From for PyCatalog { + fn from(catalog: CatalogRef) -> Self { + Self(catalog) + } +} + +impl From for PyCatalogImpl { + fn from(obj: PyObject) -> Self { + Self(obj) + } +} + impl From for PyIdentifier { fn from(value: Identifier) -> Self { Self(value) } } -/// Defines the python daft. -pub fn register_modules<'py>(parent: &Bound<'py, PyModule>) -> PyResult> { - let module = PyModule::new(parent.py(), "catalog")?; - module.add_class::()?; - module.add_wrapped(wrap_pyfunction!(py_read_table))?; - module.add_wrapped(wrap_pyfunction!(py_register_table))?; - module.add_wrapped(wrap_pyfunction!(py_unregister_catalog))?; - parent.add_submodule(&module)?; - Ok(module) +impl AsRef for PyIdentifier { + fn as_ref(&self) -> &Identifier { + &self.0 + } +} + +pub fn register_modules(parent: &Bound) -> PyResult<()> { + parent.add_class::()?; + parent.add_class::()?; + parent.add_class::()?; + parent.add_class::()?; + Ok(()) } diff --git a/src/daft-catalog/src/table.rs b/src/daft-catalog/src/table.rs new file mode 100644 index 0000000000..f6bdaabdb9 --- /dev/null +++ b/src/daft-catalog/src/table.rs @@ -0,0 +1,94 @@ +use std::sync::Arc; + +use daft_core::prelude::SchemaRef; +use daft_logical_plan::{LogicalPlanBuilder, LogicalPlanRef}; + +use crate::{bindings::Bindings, error::Result}; + +/// Table implementation reference. +pub type TableRef = Arc; + +/// Table sources for now are just references. +#[derive(Debug, Clone)] +pub enum TableSource { + /// Table source for CREATE TABLE t () + Schema(SchemaRef), + /// Table source for CREATE TABLE t AS + View(LogicalPlanRef), +} + +impl From for TableSource { + fn from(schema: SchemaRef) -> Self { + TableSource::Schema(schema) + } +} + +impl From for TableSource { + fn from(view: LogicalPlanRef) -> Self { + TableSource::View(view) + } +} + +impl From for TableSource { + fn from(view: LogicalPlanBuilder) -> Self { + TableSource::View(view.build()) + } +} + +/// TableProvider is a collection of referenceable tables. +pub type TableProvider = Bindings; + +/// TODO consider moving out to daft-table, but this isn't necessary or helpful right now. +pub trait Table: Sync + Send + std::fmt::Debug { + /// Returns the table schema + fn get_schema(&self) -> SchemaRef; + + /// Returns a logical plan for this table. + fn get_logical_plan(&self) -> Result; + + /// Leverage dynamic dispatch to return the inner object for a PyTableImpl (generics?) + #[cfg(feature = "python")] + fn to_py(&self, _: pyo3::Python<'_>) -> pyo3::PyResult { + panic!("missing to_py implementation, consider PyTable(self) as the blanket implementation") + } +} + +/// View provides an implementation of Table over a DataFrame. +#[derive(Debug, Clone)] +pub struct View(LogicalPlanRef); + +impl From for View { + fn from(plan: LogicalPlanRef) -> Self { + Self(plan) + } +} + +impl View { + /// Remove me? + pub fn arced(self) -> Arc { + Arc::new(self) + } +} + +impl Table for View { + /// Returns a reference to the inner plan's schema + fn get_schema(&self) -> SchemaRef { + self.0.schema().clone() + } + + /// Returns a reference to the inner plan + fn get_logical_plan(&self) -> Result { + Ok(self.0.clone()) + } + + /// This is a little ugly .. it creates a PyObject which implements a python Table + #[cfg(feature = "python")] + fn to_py(&self, py: pyo3::Python<'_>) -> pyo3::PyResult { + use pyo3::{types::PyAnyMethods, IntoPyObject}; + + use crate::python::PyTable; + PyTable::new(self.clone().arced()) + .into_pyobject(py)? + .extract() + } +} diff --git a/src/daft-connect/src/execute.rs b/src/daft-connect/src/execute.rs index 8030982cb4..f1302282a0 100644 --- a/src/daft-connect/src/execute.rs +++ b/src/daft-connect/src/execute.rs @@ -2,7 +2,6 @@ use std::{future::ready, rc::Rc, sync::Arc}; use common_error::DaftResult; use common_file_formats::FileFormat; -use daft_catalog::Identifier; use daft_context::get_context; use daft_dsl::LiteralValue; use daft_logical_plan::LogicalPlanBuilder; @@ -243,7 +242,7 @@ impl ConnectSession { let session = self.session_mut(); session - .create_table(Identifier::simple(name), input) + .create_temp_table(&name, &input.into()) .map_err(|e| { Status::internal( textwrap::wrap(&format!("Error in Daft server: {e}"), 120).join("\n"), diff --git a/src/daft-session/src/options.rs b/src/daft-session/src/options.rs index b8967cbf56..a4020cde40 100644 --- a/src/daft-session/src/options.rs +++ b/src/daft-session/src/options.rs @@ -7,15 +7,15 @@ pub(crate) const DAFT_SESSION_DEFAULT_SCHEMA: &str = "default"; #[derive(Debug)] pub(crate) struct Options { - _curr_catalog: String, - _curr_schema: String, + pub curr_catalog: String, + pub curr_schema: String, } impl Default for Options { fn default() -> Self { Self { - _curr_catalog: DAFT_SESSION_DEFAULT_CATALOG.to_string(), - _curr_schema: DAFT_SESSION_DEFAULT_SCHEMA.to_string(), + curr_catalog: DAFT_SESSION_DEFAULT_CATALOG.to_string(), + curr_schema: DAFT_SESSION_DEFAULT_SCHEMA.to_string(), } } } diff --git a/src/daft-session/src/python.rs b/src/daft-session/src/python.rs index e0b1963036..e9d06e3861 100644 --- a/src/daft-session/src/python.rs +++ b/src/daft-session/src/python.rs @@ -1,9 +1,14 @@ +use std::sync::Arc; + +use daft_catalog::{ + python::{PyCatalog, PyCatalogImpl, PyIdentifier, PyTable, PyTableSource}, + Namespace, +}; use pyo3::prelude::*; use crate::Session; #[pyclass] -#[allow(dead_code)] pub struct PySession(Session); #[pymethods] @@ -12,6 +17,91 @@ impl PySession { pub fn empty() -> Self { Self(Session::empty()) } + + pub fn exec(&self, input: &str) -> PyResult<()> { + todo!() + } + + pub fn current_catalog(&self) -> PyResult<()> { + todo!() + } + + pub fn current_namespace(&self) -> PyResult { + todo!() + } + + pub fn attach(&self, catalog: PyObject, alias: String) -> PyResult<()> { + Ok(self + .0 + .attach(Arc::new(PyCatalogImpl::from(catalog)), alias)?) + } + + pub fn detach(&self, catalog: &str) -> PyResult<()> { + Ok(self.0.detach(catalog)?) + } + + pub fn create_catalog(&self, name: &str) -> PyResult { + todo!() + } + + pub fn create_namespace(&self, name: &str) -> Namespace { + todo!() + } + + pub fn create_table(&self, name: &str, source: &PyTableSource) -> PyResult<()> { + todo!() + } + + pub fn create_temp_table(&self, name: &str, source: &PyTableSource) -> PyResult { + Python::with_gil(|py| { + let table = self.0.create_temp_table(name, source.as_ref())?; + let table = table.to_py(py)?; + Ok(table) + }) + } + + pub fn get_catalog(&self, name: &str) -> PyResult { + Python::with_gil(|py| { + let catalog = self.0.get_catalog(name)?; + let catalog = catalog.to_py(py); + Ok(catalog) + }) + } + + pub fn get_namespace(&self, name: &str) -> Namespace { + todo!() + } + + pub fn get_table(&self, name: &PyIdentifier) -> PyResult { + Python::with_gil(|py| { + let table = self.0.get_table(name.as_ref())?; + let table = table.to_py(py)?; + Ok(table) + }) + } + + #[pyo3(signature = (pattern=None))] + pub fn list_catalogs(&self, pattern: Option<&str>) -> PyResult> { + Ok(self.0.list_catalogs(pattern)?) + } + + #[pyo3(signature = (pattern=None))] + pub fn list_namespaces(&self, pattern: Option<&str>) -> PyResult<()> { + todo!() + } + + #[pyo3(signature = (pattern=None))] + pub fn list_tables(&self, pattern: Option<&str>) -> PyResult> { + Ok(self.0.list_tables(pattern)?) + } + + pub fn set_catalog(&self, name: &str) -> PyResult<()> { + Ok(self.0.set_catalog(name)?) + } + + pub fn set_namespace(&self, name: &PyIdentifier) { + todo!() + } } pub fn register_modules(parent: &Bound) -> PyResult<()> { diff --git a/src/daft-session/src/session.rs b/src/daft-session/src/session.rs index d3f084ffb4..82d59494b4 100644 --- a/src/daft-session/src/session.rs +++ b/src/daft-session/src/session.rs @@ -3,7 +3,10 @@ use std::{ sync::{Arc, RwLock, RwLockReadGuard, RwLockWriteGuard}, }; -use daft_catalog::{catalog::Catalogs, DataCatalog, Identifier}; +use daft_catalog::{ + Bindings, Catalog, CatalogProvider, CatalogRef, Identifier, Table, TableProvider, TableRef, + TableSource, View, +}; use daft_logical_plan::LogicalPlanBuilder; use uuid::Uuid; @@ -25,12 +28,12 @@ struct SessionState { /// Session identifier _id: String, /// Session options i.e. curr_catalog and curr_schema. - _options: Options, - /// References to the available catalogs. - catalogs: Catalogs, + options: Options, + /// Bindings for the attached catalogs. + catalogs: Bindings, + /// Bindings for the temporary tables. + tables: Bindings, // TODO execution context - // TODO temporary! tables come from a catalog, not here!! - tables: HashMap, // TODO identifier matcher for case-insensitive matching } @@ -39,9 +42,9 @@ impl Session { pub fn empty() -> Self { let state = SessionState { _id: Uuid::new_v4().to_string(), - _options: Options::default(), - catalogs: Catalogs::empty(), - tables: HashMap::new(), + options: Options::default(), + catalogs: Bindings::empty(), + tables: Bindings::empty(), }; let state = RwLock::new(state); let state = Arc::new(state); @@ -59,39 +62,65 @@ impl Session { } /// Attaches a catalog to this session, err if already exists. - pub fn attach(&self, name: String, catalog: Arc) -> Result<()> { - if self.state().catalogs.exists(&name) { - obj_already_exists_err!("Catalog", &name.into()) + pub fn attach(&self, catalog: CatalogRef, alias: String) -> Result<()> { + if self.state().catalogs.exists(&alias) { + obj_already_exists_err!("Catalog", &alias.into()) } - self.state_mut().catalogs.attach(name, catalog); + self.state_mut().catalogs.insert(alias, catalog); Ok(()) } /// Detaches a catalog from this session, err if does not exist. - pub fn detach(&self, name: &str) -> Result<()> { - if self.state().catalogs.exists(name) { - obj_not_found_err!("Catalog", &name.into()) + pub fn detach(&self, catalog: &str) -> Result<()> { + if !self.state().catalogs.exists(catalog) { + obj_not_found_err!("Catalog", &catalog.into()) } - self.state_mut().catalogs.detach(name); + self.state_mut().catalogs.remove(catalog); Ok(()) } - /// Creates a table backed by the view - /// TODO support other table sources. - pub fn create_table( - &self, - name: Identifier, - view: impl Into, - ) -> Result<()> { - if name.has_namespace() { - unsupported_err!("Creating a table with a namespace is not yet supported, Instead use a single identifier, or wrap your table name in quotes such as `\"{}\"`", name); + /// Creates a table in the current namespace with the given source. + pub fn create_table(&self, name: Identifier, source: &TableSource) -> Result<()> { + unsupported_err!("Creating a table is not implemented, try create_temp_table.") + } + + /// Creates a temp table scoped to this session from an existing view. + pub fn create_temp_table(&self, name: &str, source: &TableSource) -> Result { + if self.state().tables.exists(name) { + obj_already_exists_err!("Temporary table", &name.into()) + } + // we don't have mutable temporary tables, only immutable views over dataframes. + let table = match source { + TableSource::Schema(_) => unsupported_err!("temporary table with schema"), + TableSource::View(plan) => View::from(plan.clone()).arced(), + }; + self.state_mut() + .tables + .insert(name.to_string(), table.clone()); + Ok(table) + } + + /// Returns the session's current catalog. + pub fn current_catalog(&self) -> Result { + self.get_catalog(&self.state().options.curr_catalog) + } + + /// Returns the session's current schema. + pub fn current_namespace(&self) -> Result<()> { + unsupported_err!("current namespace") + } + + /// Returns the catalog or an object not found error. + pub fn get_catalog(&self, name: &str) -> Result { + if let Some(catalog) = self.state().catalogs.get(name) { + Ok(catalog.clone()) + } else { + obj_not_found_err!("Catalog", &name.into()) } - self.state_mut().tables.insert(name.name, view.into()); - Ok(()) } - /// Gets a table by identifier - pub fn get_table(&self, name: &Identifier) -> Result { + /// Returns the table or an object not found error. + pub fn get_table(&self, name: &Identifier) -> Result { if name.has_namespace() { unsupported_err!("Qualified identifiers are not yet supported") } @@ -101,12 +130,36 @@ impl Session { obj_not_found_err!("Table", name) } - /// Returns true iff the session has for the given identifier + /// Returns true iff the session has access to a matching catalog. + pub fn has_catalog(&self, name: &str) -> bool { + self.state().catalogs.exists(name) + } + + /// Returns true iff the session has access to a matching table. pub fn has_table(&self, name: &Identifier) -> bool { if name.has_namespace() { return false; } - return self.state().tables.contains_key(&name.name); + return self.state().tables.exists(&name.name); + } + + /// Lists all catalogs matching the pattern. + pub fn list_catalogs(&self, pattern: Option<&str>) -> Result> { + Ok(self.state().catalogs.list(pattern)) + } + + /// Lists all tables matching the pattern. + pub fn list_tables(&self, pattern: Option<&str>) -> Result> { + Ok(self.state().tables.list(pattern)) + } + + /// Sets the current_catalog + pub fn set_catalog(&self, name: &str) -> Result<()> { + if !self.has_catalog(name) { + obj_not_found_err!("Catalog", &name.into()) + } + self.state_mut().options.curr_catalog = name.to_string(); + Ok(()) } } diff --git a/src/daft-sql/src/lib.rs b/src/daft-sql/src/lib.rs index 17efaddd78..7a89c0fd33 100644 --- a/src/daft-sql/src/lib.rs +++ b/src/daft-sql/src/lib.rs @@ -28,7 +28,7 @@ pub fn register_modules(parent: &Bound) -> PyResult<()> { mod tests { use std::sync::Arc; - use daft_catalog::Identifier; + use daft_catalog::{Table, TableSource, View}; use daft_core::prelude::*; use daft_dsl::{col, lit, Expr, OuterReferenceColumn, Subquery}; use daft_logical_plan::{ @@ -116,9 +116,9 @@ mod tests { fn planner() -> SQLPlanner<'static> { let session = Session::default(); - _ = session.create_table(Identifier::simple("tbl1"), tbl_1()); - _ = session.create_table(Identifier::simple("tbl2"), tbl_2()); - _ = session.create_table(Identifier::simple("tbl3"), tbl_3()); + _ = session.create_temp_table("tbl1", &tbl_1().into()); + _ = session.create_temp_table("tbl2", &tbl_2().into()); + _ = session.create_temp_table("tbl3", &tbl_3().into()); SQLPlanner::new(session.into()) } diff --git a/src/daft-sql/src/planner.rs b/src/daft-sql/src/planner.rs index f6f33fe39b..ecb3578978 100644 --- a/src/daft-sql/src/planner.rs +++ b/src/daft-sql/src/planner.rs @@ -201,8 +201,13 @@ impl<'a> SQLPlanner<'a> { fn get_table(&self, ident: &Identifier) -> Option { self.session() .get_table(ident) + .map(|table| { + table + .get_logical_plan() + .expect("could not create a logical plan from the table") + }) .ok() - .map(|view| Relation::new(view, ident.name.to_string())) + .map(|view| Relation::new(view.into(), ident.name.to_string())) } /// Borrow the planning session diff --git a/src/daft-sql/src/python.rs b/src/daft-sql/src/python.rs index be7efc5cbd..5a2e946f0d 100644 --- a/src/daft-sql/src/python.rs +++ b/src/daft-sql/src/python.rs @@ -1,6 +1,7 @@ use std::{collections::HashMap, sync::Arc}; use common_daft_config::PyDaftPlanningConfig; +use daft_catalog::View; use daft_dsl::python::PyExpr; use daft_logical_plan::{LogicalPlan, LogicalPlanBuilder, PyLogicalPlanBuilder}; use daft_session::Session; @@ -43,8 +44,8 @@ pub fn sql( // TODO remove once using session.sql / session.exec let session = Session::empty(); // TODO remove once session replaces PyCatalog; create all the views in this session - for (name, table) in catalog.tables { - session.create_table(name.into(), table)?; + for (name, view) in catalog.tables { + session.create_temp_table(&name, &view.into())?; } let mut planner = SQLPlanner::new(session.into()); let plan = planner.plan_sql(sql)?; diff --git a/src/lib.rs b/src/lib.rs index 6d89597a2f..c70e6cb94f 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -103,6 +103,7 @@ pub mod pylib { common_resource_request::register_modules(m)?; common_file_formats::python::register_modules(m)?; common_scan_info::register_modules(m)?; + daft_catalog::register_modules(m)?; daft_connect::register_modules(m)?; daft_context::register_modules(m)?; daft_core::register_modules(m)?; @@ -120,12 +121,10 @@ pub mod pylib { daft_recordbatch::register_modules(m)?; daft_scan::register_modules(m)?; daft_scheduler::register_modules(m)?; - daft_sql::register_modules(m)?; daft_session::register_modules(m)?; - - // Register catalog module - let catalog_module = daft_catalog::python::register_modules(m)?; - daft_catalog_python_catalog::python::register_modules(&catalog_module)?; + daft_sql::register_modules(m)?; + daft_functions::register_modules(m)?; + daft_functions_json::register_modules(m)?; // Register testing module let testing_module = PyModule::new(m.py(), "testing")?; diff --git a/tests/catalog/test_catalog.py b/tests/catalog/test_catalog.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/tests/test_session.py b/tests/test_session.py index 91d97b4b63..1b3e1cfb99 100644 --- a/tests/test_session.py +++ b/tests/test_session.py @@ -1,9 +1,112 @@ import daft -### -# SESSION SETUP -### +import pytest +import daft +from daft.logical.schema import Schema, Field +from daft import Catalog, Session + +""" +SESSION SETUP +""" def test_current_session_exists(): assert daft.current_session() is not None + +""" +ATTACH & DETACH +""" + +def test_attach(): + sess = Session.empty() + # + # create some 'existing' catalogs + cat1 = Catalog.empty() + cat2 = Catalog.empty() + # + # attach them.. + sess.attach(cat1, alias="cat1") + sess.attach(cat2, alias="cat2") + # + # list_catalogs + assert 2 == len(sess.list_catalogs()) + # + # get_catalog + assert sess.get_catalog("cat1") == cat1 + assert sess.get_catalog("cat2") == cat2 + # + # error! + with pytest.raises(Exception, match="already exists"): + sess.attach(cat1, alias="cat1") + +def test_detach(): + sess = Session.empty() + # + # setup. + cat1 = Catalog.empty() + cat2 = Catalog.empty() + sess.attach(cat1, alias="cat1") + sess.attach(cat2, alias="cat2") + # + # + assert 2 == len(sess.list_catalogs()) + # + # detach existing + sess.detach("cat1") + assert 1 == len(sess.list_catalogs()) + # + # error! + with pytest.raises(Exception, match="not found"): + sess.detach("cat1") + +""" +CATALOG ACTIONS +""" + +@pytest.mark.skip +def test_catalog_actions(): + sess = Session.empty() + # + # setup. + cat1 = Catalog.empty() + cat2 = Catalog.empty() + sess.attach(cat1, alias="cat1") + sess.attach(cat2, alias="cat2") + # + # current_catalog should default to first in. + assert cat1 == sess.current_catalog() + # + # set_catalog and current_catalog + sess.set_catalog("cat2") + assert cat2 == sess.current_catalog() + +""" +TABLE ACTIONS +""" + +def schema(**columns): + fields = [Field.create(name, dtype) for name, dtype in columns.items()] + return Schema._from_fields(fields) + +def test_table_actions(): + sess = Session.empty() + # + # create_temp_table + t1 = sess.create_temp_table("t1") + t2 = sess.create_temp_table("t2") + t3 = sess.create_temp_table("t3", daft.from_pydict({})) + assert t1 is not None + assert t2 is not None + assert t3 is not None + # + # get_table (todo equality since these are diff objects) + assert sess.get_table("t1") is not None + assert sess.get_table("t2") is not None + # + # err on table not found + with pytest.raises(Exception, match="not found"): + sess.get_table("t4") + # + # err on qualified identifier + with pytest.raises(Exception, match="not yet supported"): + sess.get_table("default.t1")