diff --git a/Cargo.lock b/Cargo.lock index 8918017..9d0c002 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -82,6 +82,12 @@ dependencies = [ "libc", ] +[[package]] +name = "anyhow" +version = "1.0.82" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f538837af36e6f6a9be0faa67f9a314f8119e4e4b5867c6ab40ed60360142519" + [[package]] name = "apache-avro" version = "0.16.0" @@ -124,9 +130,9 @@ checksum = "96d30a06541fbafbc7f82ed10c06164cfbd2c401138f6addd8404629c4b16711" [[package]] name = "arrow" -version = "50.0.0" +version = "51.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "aa285343fba4d829d49985bdc541e3789cf6000ed0e84be7c039438df4a4e78c" +checksum = "219d05930b81663fd3b32e3bde8ce5bff3c4d23052a99f11a8fa50a3b47b2658" dependencies = [ "arrow-arith", "arrow-array", @@ -145,9 +151,9 @@ dependencies = [ [[package]] name = "arrow-arith" -version = "50.0.0" +version = "51.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "753abd0a5290c1bcade7c6623a556f7d1659c5f4148b140b5b63ce7bd1a45705" +checksum = "0272150200c07a86a390be651abdd320a2d12e84535f0837566ca87ecd8f95e0" dependencies = [ "arrow-array", "arrow-buffer", @@ -160,9 +166,9 @@ dependencies = [ [[package]] name = "arrow-array" -version = "50.0.0" +version = "51.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d390feeb7f21b78ec997a4081a025baef1e2e0d6069e181939b61864c9779609" +checksum = "8010572cf8c745e242d1b632bd97bd6d4f40fefed5ed1290a8f433abaa686fea" dependencies = [ "ahash", "arrow-buffer", @@ -177,9 +183,9 @@ dependencies = [ [[package]] name = "arrow-buffer" -version = "50.0.0" +version = "51.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "69615b061701bcdffbc62756bc7e85c827d5290b472b580c972ebbbf690f5aa4" +checksum = "0d0a2432f0cba5692bf4cb757469c66791394bac9ec7ce63c1afe74744c37b27" dependencies = [ "bytes", "half", @@ -188,28 +194,30 @@ dependencies = [ [[package]] name = "arrow-cast" -version = "50.0.0" +version = "51.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e448e5dd2f4113bf5b74a1f26531708f5edcacc77335b7066f9398f4bcf4cdef" +checksum = "9abc10cd7995e83505cc290df9384d6e5412b207b79ce6bdff89a10505ed2cba" dependencies = [ "arrow-array", "arrow-buffer", "arrow-data", "arrow-schema", "arrow-select", - "base64", + "atoi", + "base64 0.22.1", "chrono", "comfy-table", "half", "lexical-core", "num", + "ryu", ] [[package]] name = "arrow-csv" -version = "50.0.0" +version = "51.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "46af72211f0712612f5b18325530b9ad1bfbdc87290d5fbfd32a7da128983781" +checksum = "95cbcba196b862270bf2a5edb75927380a7f3a163622c61d40cbba416a6305f2" dependencies = [ "arrow-array", "arrow-buffer", @@ -226,9 +234,9 @@ dependencies = [ [[package]] name = "arrow-data" -version = "50.0.0" +version = "51.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "67d644b91a162f3ad3135ce1184d0a31c28b816a581e08f29e8e9277a574c64e" +checksum = "2742ac1f6650696ab08c88f6dd3f0eb68ce10f8c253958a18c943a68cd04aec5" dependencies = [ "arrow-buffer", "arrow-schema", @@ -238,9 +246,9 @@ dependencies = [ [[package]] name = "arrow-ipc" -version = "50.0.0" +version = "51.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "03dea5e79b48de6c2e04f03f62b0afea7105be7b77d134f6c5414868feefb80d" +checksum = "a42ea853130f7e78b9b9d178cb4cd01dee0f78e64d96c2949dc0a915d6d9e19d" dependencies = [ "arrow-array", "arrow-buffer", @@ -253,9 +261,9 @@ dependencies = [ [[package]] name = "arrow-json" -version = "50.0.0" +version = "51.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8950719280397a47d37ac01492e3506a8a724b3fb81001900b866637a829ee0f" +checksum = "eaafb5714d4e59feae964714d724f880511500e3569cc2a94d02456b403a2a49" dependencies = [ "arrow-array", "arrow-buffer", @@ -273,9 +281,9 @@ dependencies = [ [[package]] name = "arrow-ord" -version = "50.0.0" +version = "51.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1ed9630979034077982d8e74a942b7ac228f33dd93a93b615b4d02ad60c260be" +checksum = "e3e6b61e3dc468f503181dccc2fc705bdcc5f2f146755fa5b56d0a6c5943f412" dependencies = [ "arrow-array", "arrow-buffer", @@ -288,9 +296,9 @@ dependencies = [ [[package]] name = "arrow-row" -version = "50.0.0" +version = "51.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "007035e17ae09c4e8993e4cb8b5b96edf0afb927cd38e2dff27189b274d83dcf" +checksum = "848ee52bb92eb459b811fb471175ea3afcf620157674c8794f539838920f9228" dependencies = [ "ahash", "arrow-array", @@ -303,15 +311,18 @@ dependencies = [ [[package]] name = "arrow-schema" -version = "50.0.0" +version = "51.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0ff3e9c01f7cd169379d269f926892d0e622a704960350d09d331be3ec9e0029" +checksum = "02d9483aaabe910c4781153ae1b6ae0393f72d9ef757d38d09d450070cf2e528" +dependencies = [ + "serde", +] [[package]] name = "arrow-select" -version = "50.0.0" +version = "51.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1ce20973c1912de6514348e064829e50947e35977bb9d7fb637dc99ea9ffd78c" +checksum = "849524fa70e0e3c5ab58394c770cb8f514d0122d20de08475f7b472ed8075830" dependencies = [ "ahash", "arrow-array", @@ -323,15 +334,16 @@ dependencies = [ [[package]] name = "arrow-string" -version = "50.0.0" +version = "51.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "00f3b37f2aeece31a2636d1b037dabb69ef590e03bdc7eb68519b51ec86932a7" +checksum = "9373cb5a021aee58863498c37eb484998ef13377f69989c6c5ccfbd258236cdb" dependencies = [ "arrow-array", "arrow-buffer", "arrow-data", "arrow-schema", "arrow-select", + "memchr", "num", "regex", "regex-syntax", @@ -366,6 +378,15 @@ dependencies = [ "syn 2.0.60", ] +[[package]] +name = "atoi" +version = "2.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f28d99ec8bfea296261ca1af174f24225171fea9664ba9003cbebee704810528" +dependencies = [ + "num-traits", +] + [[package]] name = "autocfg" version = "1.2.0" @@ -438,6 +459,29 @@ dependencies = [ "uuid", ] +[[package]] +name = "aws-sdk-dynamodb" +version = "1.23.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "059a3cf6c50bc79b05153a18563be062f55a8b7b3e73392aec7ba479f825fc97" +dependencies = [ + "aws-credential-types", + "aws-runtime", + "aws-smithy-async", + "aws-smithy-http", + "aws-smithy-json", + "aws-smithy-runtime", + "aws-smithy-runtime-api", + "aws-smithy-types", + "aws-types", + "bytes", + "fastrand", + "http 0.2.12", + "once_cell", + "regex-lite", + "tracing", +] + [[package]] name = "aws-sdk-glue" version = "1.27.0" @@ -693,6 +737,20 @@ dependencies = [ "tracing", ] +[[package]] +name = "backoff" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b62ddb9cb1ec0a098ad4bbf9344d0713fa193ae1a80af55febcff2627b6a00c1" +dependencies = [ + "futures-core", + "getrandom", + "instant", + "pin-project-lite", + "rand", + "tokio", +] + [[package]] name = "backtrace" version = "0.3.71" @@ -714,6 +772,12 @@ version = "0.21.7" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9d297deb1925b89f2ccc13d7635fa0714f12c87adce1c75356b39ca9b7178567" +[[package]] +name = "base64" +version = "0.22.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "72b3254f16251a8381aa12e40e3c4d2f0199f8c6508fbecb9d91f575e0fbb8c6" + [[package]] name = "base64-simd" version = "0.8.0" @@ -794,6 +858,12 @@ version = "3.16.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "79296716171880943b8470b5f8d03aa55eb2e645a4874bdbb28adb49162e012c" +[[package]] +name = "bytemuck" +version = "1.15.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5d6d68c57235a3a081186990eca2867354726650f42f7516ca50c28d6281fd15" + [[package]] name = "byteorder" version = "1.5.0" @@ -1027,9 +1097,9 @@ dependencies = [ [[package]] name = "datafusion" -version = "36.0.0" +version = "37.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b2b360b692bf6c6d6e6b6dbaf41a3be0020daeceac0f406aed54c75331e50dbb" +checksum = "85069782056753459dc47e386219aa1fdac5b731f26c28abb8c0ffd4b7c5ab11" dependencies = [ "ahash", "apache-avro", @@ -1044,6 +1114,7 @@ dependencies = [ "chrono", "dashmap", "datafusion-common", + "datafusion-common-runtime", "datafusion-execution", "datafusion-expr", "datafusion-functions", @@ -1086,6 +1157,7 @@ dependencies = [ "aws-types", "dashmap", "datafusion", + "deltalake", "object_store", "pest", "pest_derive", @@ -1095,9 +1167,9 @@ dependencies = [ [[package]] name = "datafusion-common" -version = "36.0.0" +version = "37.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "37f343ccc298f440e25aa38ff82678291a7acc24061c7370ba6c0ff5cc811412" +checksum = "309d9040751f6dc9e33c85dce6abb55a46ef7ea3644577dd014611c379447ef3" dependencies = [ "ahash", "apache-avro", @@ -1107,6 +1179,7 @@ dependencies = [ "arrow-schema", "chrono", "half", + "instant", "libc", "num_cpus", "object_store", @@ -1114,11 +1187,20 @@ dependencies = [ "sqlparser", ] +[[package]] +name = "datafusion-common-runtime" +version = "37.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a3e4a44d8ef1b1e85d32234e6012364c411c3787859bb3bba893b0332cb03dfd" +dependencies = [ + "tokio", +] + [[package]] name = "datafusion-execution" -version = "36.0.0" +version = "37.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3f9c93043081487e335399a21ebf8295626367a647ac5cb87d41d18afad7d0f7" +checksum = "06a3a29ae36bcde07d179cc33b45656a8e7e4d023623e320e48dcf1200eeee95" dependencies = [ "arrow", "chrono", @@ -1137,13 +1219,14 @@ dependencies = [ [[package]] name = "datafusion-expr" -version = "36.0.0" +version = "37.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e204d89909e678846b6a95f156aafc1ee5b36cb6c9e37ec2e1449b078a38c818" +checksum = "2a3542aa322029c2121a671ce08000d4b274171070df13f697b14169ccf4f628" dependencies = [ "ahash", "arrow", "arrow-array", + "chrono", "datafusion-common", "paste", "sqlparser", @@ -1153,38 +1236,54 @@ dependencies = [ [[package]] name = "datafusion-functions" -version = "36.0.0" +version = "37.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "98f1c73f7801b2b8ba2297b3ad78ffcf6c1fc6b8171f502987eb9ad5cb244ee7" +checksum = "dd221792c666eac174ecc09e606312844772acc12cbec61a420c2fca1ee70959" dependencies = [ "arrow", - "base64", + "base64 0.22.1", + "blake2", + "blake3", + "chrono", "datafusion-common", "datafusion-execution", "datafusion-expr", + "datafusion-physical-expr", "hex", + "itertools", "log", + "md-5", + "regex", + "sha2", + "unicode-segmentation", + "uuid", ] [[package]] name = "datafusion-functions-array" -version = "36.0.0" +version = "37.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "42d16a0ddf2c991526f6ffe2f47a72c6da0b7354d6c32411dd20631fe2e38937" +checksum = "e501801e84d9c6ef54caaebcda1b18a6196a24176c12fb70e969bc0572e03c55" dependencies = [ "arrow", + "arrow-array", + "arrow-buffer", + "arrow-ord", + "arrow-schema", "datafusion-common", "datafusion-execution", "datafusion-expr", + "datafusion-functions", + "itertools", "log", "paste", ] [[package]] name = "datafusion-optimizer" -version = "36.0.0" +version = "37.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5ae27e07bf1f04d327be5c2a293470879801ab5535204dc3b16b062fda195496" +checksum = "76bd7f5087817deb961764e8c973d243b54f8572db414a8f0a8f33a48f991e0a" dependencies = [ "arrow", "async-trait", @@ -1200,9 +1299,9 @@ dependencies = [ [[package]] name = "datafusion-physical-expr" -version = "36.0.0" +version = "37.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "dde620cd9ef76a3bca9c754fb68854bd2349c49f55baf97e08001f9e967f6d6b" +checksum = "5cabc0d9aaa0f5eb1b472112f16223c9ffd2fb04e58cbf65c0a331ee6e993f96" dependencies = [ "ahash", "arrow", @@ -1211,7 +1310,7 @@ dependencies = [ "arrow-ord", "arrow-schema", "arrow-string", - "base64", + "base64 0.22.1", "blake2", "blake3", "chrono", @@ -1231,14 +1330,13 @@ dependencies = [ "regex", "sha2", "unicode-segmentation", - "uuid", ] [[package]] name = "datafusion-physical-plan" -version = "36.0.0" +version = "37.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9a4c75fba9ea99d64b2246cbd2fcae2e6fc973e6616b1015237a616036506dd4" +checksum = "17c0523e9c8880f2492a88bbd857dde02bed1ed23f3e9211a89d3d7ec3b44af9" dependencies = [ "ahash", "arrow", @@ -1248,6 +1346,7 @@ dependencies = [ "async-trait", "chrono", "datafusion-common", + "datafusion-common-runtime", "datafusion-execution", "datafusion-expr", "datafusion-physical-expr", @@ -1262,21 +1361,137 @@ dependencies = [ "pin-project-lite", "rand", "tokio", - "uuid", +] + +[[package]] +name = "datafusion-proto" +version = "37.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "db73393e42f35e165d31399192fbf41691967d428ebed47875ad34239fbcfc16" +dependencies = [ + "arrow", + "chrono", + "datafusion", + "datafusion-common", + "datafusion-expr", + "object_store", + "prost", ] [[package]] name = "datafusion-sql" -version = "36.0.0" +version = "37.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "21474a95c3a62d113599d21b439fa15091b538bac06bd20be0bb2e7d22903c09" +checksum = "49eb54b42227136f6287573f2434b1de249fe1b8e6cd6cc73a634e4a3ec29356" dependencies = [ "arrow", + "arrow-array", "arrow-schema", "datafusion-common", "datafusion-expr", "log", "sqlparser", + "strum 0.26.2", +] + +[[package]] +name = "deltalake" +version = "0.17.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d7e6d7dc62f957c02d899cd6a88be74c70594f7782b24c97392267b49ed5c9a5" +dependencies = [ + "deltalake-aws", + "deltalake-core", +] + +[[package]] +name = "deltalake-aws" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f7c15dda219ed925c28e2387712f9a77ed7253a80b53fce59849067bc0918eb2" +dependencies = [ + "async-trait", + "aws-config", + "aws-credential-types", + "aws-sdk-dynamodb", + "aws-sdk-sts", + "aws-smithy-runtime-api", + "backoff", + "bytes", + "deltalake-core", + "futures", + "lazy_static", + "maplit", + "object_store", + "regex", + "thiserror", + "tokio", + "tracing", + "url", + "uuid", +] + +[[package]] +name = "deltalake-core" +version = "0.17.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6e8dc1bcd91be689ee7f6ce363798dc2e9b6954be9d597f884de11ba27b33add" +dependencies = [ + "arrow", + "arrow-arith", + "arrow-array", + "arrow-buffer", + "arrow-cast", + "arrow-ipc", + "arrow-json", + "arrow-ord", + "arrow-row", + "arrow-schema", + "arrow-select", + "async-trait", + "bytes", + "cfg-if", + "chrono", + "dashmap", + "datafusion", + "datafusion-common", + "datafusion-expr", + "datafusion-functions", + "datafusion-functions-array", + "datafusion-physical-expr", + "datafusion-proto", + "datafusion-sql", + "either", + "errno", + "fix-hidden-lifetime-bug", + "futures", + "hashbrown 0.14.3", + "indexmap", + "itertools", + "lazy_static", + "libc", + "maplit", + "num-bigint", + "num-traits", + "num_cpus", + "object_store", + "once_cell", + "parking_lot", + "parquet", + "percent-encoding", + "pin-project-lite", + "rand", + "regex", + "roaring", + "serde", + "serde_json", + "sqlparser", + "thiserror", + "tokio", + "tracing", + "url", + "uuid", + "z85", ] [[package]] @@ -1342,6 +1557,26 @@ version = "2.0.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "658bd65b1cf4c852a3cc96f18a8ce7b5640f6b703f905c7d74532294c2a63984" +[[package]] +name = "fix-hidden-lifetime-bug" +version = "0.2.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d4ae9c2016a663983d4e40a9ff967d6dcac59819672f0b47f2b17574e99c33c8" +dependencies = [ + "fix-hidden-lifetime-bug-proc_macros", +] + +[[package]] +name = "fix-hidden-lifetime-bug-proc_macros" +version = "0.2.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e4c81935e123ab0741c4c4f0d9b8377e5fb21d3de7e062fa4b1263b1fbcba1ea" +dependencies = [ + "proc-macro2", + "quote", + "syn 1.0.109", +] + [[package]] name = "fixedbitset" version = "0.4.2" @@ -1738,6 +1973,18 @@ dependencies = [ "hashbrown 0.14.3", ] +[[package]] +name = "instant" +version = "0.1.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7a5bbe824c507c5da5956355e86a746d82e0e1464f65d862cc5e71da70e94b2c" +dependencies = [ + "cfg-if", + "js-sys", + "wasm-bindgen", + "web-sys", +] + [[package]] name = "integer-encoding" version = "3.0.4" @@ -1931,6 +2178,12 @@ dependencies = [ "pkg-config", ] +[[package]] +name = "maplit" +version = "1.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3e2e65a1a2e43cfcb47a895c4c8b10d1f4a61097f9f254f183aee60cad9c651d" + [[package]] name = "md-5" version = "0.10.6" @@ -2081,7 +2334,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b8718f8b65fdf67a45108d1548347d4af7d71fb81ce727bbf9e3b2535e079db3" dependencies = [ "async-trait", - "base64", + "base64 0.21.7", "bytes", "chrono", "futures", @@ -2156,9 +2409,9 @@ dependencies = [ [[package]] name = "parquet" -version = "50.0.0" +version = "51.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "547b92ebf0c1177e3892f44c8f79757ee62e678d564a9834189725f2c5b7a750" +checksum = "096795d4f47f65fd3ee1ec5a98b77ab26d602f2cc785b0e4be5443add17ecc32" dependencies = [ "ahash", "arrow-array", @@ -2168,7 +2421,7 @@ dependencies = [ "arrow-ipc", "arrow-schema", "arrow-select", - "base64", + "base64 0.22.1", "brotli", "bytes", "chrono", @@ -2342,6 +2595,29 @@ dependencies = [ "unicode-ident", ] +[[package]] +name = "prost" +version = "0.12.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d0f5d036824e4761737860779c906171497f6d55681139d8312388f8fe398922" +dependencies = [ + "bytes", + "prost-derive", +] + +[[package]] +name = "prost-derive" +version = "0.12.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "19de2de2a00075bf566bee3bd4db014b11587e84184d3f7a791bc17f1a8e9e48" +dependencies = [ + "anyhow", + "itertools", + "proc-macro2", + "quote", + "syn 2.0.60", +] + [[package]] name = "quad-rand" version = "0.2.1" @@ -2447,7 +2723,7 @@ version = "0.11.27" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "dd67538700a17451e7cba03ac727fb961abb7607553461627b97de0b89cf4a62" dependencies = [ - "base64", + "base64 0.21.7", "bytes", "encoding_rs", "futures-core", @@ -2505,6 +2781,16 @@ version = "1.0.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3582f63211428f83597b51b2ddb88e2a91a9d52d12831f9d08f5e624e8977422" +[[package]] +name = "roaring" +version = "0.10.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a1c77081a55300e016cb86f2864415b7518741879db925b8d488a0ee0d2da6bf" +dependencies = [ + "bytemuck", + "byteorder", +] + [[package]] name = "rustc-demangle" version = "0.1.23" @@ -2563,7 +2849,7 @@ version = "1.0.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1c74cae0a4cf6ccbbf5f359f08efdf8ee7e1dc532573bf0db71968cb56b1448c" dependencies = [ - "base64", + "base64 0.21.7", ] [[package]] @@ -2787,9 +3073,9 @@ checksum = "6980e8d7511241f8acf4aebddbb1ff938df5eebe98691418c4468d0b72a96a67" [[package]] name = "sqlparser" -version = "0.43.1" +version = "0.44.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f95c4bae5aba7cd30bd506f7140026ade63cff5afd778af8854026f9606bf5d4" +checksum = "aaf9c7ff146298ffda83a200f8d5084f08dcee1edfc135fcc1d646a45d50ffd6" dependencies = [ "log", "sqlparser_derive", @@ -3016,6 +3302,7 @@ dependencies = [ "libc", "mio", "num_cpus", + "parking_lot", "pin-project-lite", "signal-hook-registry", "socket2", @@ -3070,6 +3357,7 @@ version = "0.1.40" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c3523ab5a71916ccf420eebdf5521fcef02141234bbc0b8a49f2fdc4544364ef" dependencies = [ + "log", "pin-project-lite", "tracing-attributes", "tracing-core", @@ -3539,6 +3827,12 @@ dependencies = [ "lzma-sys", ] +[[package]] +name = "z85" +version = "3.0.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2a599daf1b507819c1121f0bf87fa37eb19daac6aff3aefefd4e6e2e0f2020fc" + [[package]] name = "zerocopy" version = "0.7.32" diff --git a/Cargo.toml b/Cargo.toml index edd76ef..964aa1c 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -20,11 +20,16 @@ path = "src/lib.rs" aws-config = "1" aws-sdk-glue = "1" aws-types = "1" -datafusion = { version = "36", features = ["avro"] } +datafusion = { version = "37", features = ["avro"] } object_store = { version = "0.9", features = ["aws"] } pest = "2" pest_derive = "2" tokio = { version = "1", features = ["macros", "rt", "rt-multi-thread", "sync", "fs"] } url = "2" dashmap = "5" +deltalake = {version = "0.17", optional=true, features=["datafusion", "deltalake-aws", "s3"]} + +[features] +default = ["deltalake"] +deltalake = ["dep:deltalake"] diff --git a/examples/demo.rs b/examples/demo.rs index f36107b..3010cbf 100644 --- a/examples/demo.rs +++ b/examples/demo.rs @@ -40,18 +40,22 @@ async fn main() -> Result<()> { // Register an object store provider which creates instances for each requested s3://bucket using the sdk_config credentials // As an alternative you can also manually register the required object_store(s) - let object_store_provider = DemoS3ObjectStoreProvider::new(&sdk_config).await?; + let object_store_provider = Arc::new(DemoS3ObjectStoreProvider::new(&sdk_config).await?); let runtime_config = - RuntimeConfig::default().with_object_store_registry(Arc::new(object_store_provider)); + RuntimeConfig::default().with_object_store_registry(object_store_provider.clone()); let config = SessionConfig::new().with_information_schema(true); let runtime = Arc::new(RuntimeEnv::new(runtime_config)?); let ctx = SessionContext::new_with_config_rt(config, runtime); - let mut glue_catalog_provider = GlueCatalogProvider::new(&sdk_config); + let mut glue_catalog_provider = GlueCatalogProvider::new(sdk_config, object_store_provider); let register_results = glue_catalog_provider - .register_all_with_options(&TableRegistrationOptions::InferSchemaFromData, &ctx.state()) + .register_all_with_options( + &TableRegistrationOptions::DeriveSchemaFromGlueTable, + &ctx.state(), + ) .await?; + for result in register_results { if result.is_err() { // only output tables which were not registered... @@ -122,11 +126,11 @@ async fn sample(ctx: SessionContext, schema: &str, table: &str, limit: usize) -> pub struct DemoS3ObjectStoreProvider { credentials: Credentials, region: String, - object_stores: tokio::sync::RwLock>>, + object_stores: DashMap>, } impl DemoS3ObjectStoreProvider { - pub async fn new(sdk_config: &SdkConfig) -> crate::Result { + pub async fn new(sdk_config: &SdkConfig) -> Result { let credentials_provider = sdk_config .credentials_provider() .expect("could not find credentials provider"); @@ -139,8 +143,7 @@ impl DemoS3ObjectStoreProvider { .map(|r| r.to_string()) .unwrap_or_else(|| "eu-central-1".to_string()); - let object_stores: tokio::sync::RwLock>> = - tokio::sync::RwLock::new(DashMap::new()); + let object_stores = DashMap::new(); Ok(DemoS3ObjectStoreProvider { credentials, @@ -149,7 +152,7 @@ impl DemoS3ObjectStoreProvider { }) } - fn build_s3_object_store(&self, url: &Url) -> crate::Result> { + fn build_s3_object_store(&self, url: &Url) -> Result> { let bucket_name = get_host_name(url)?; let s3_builder = AmazonS3Builder::new() @@ -175,14 +178,20 @@ impl ObjectStoreRegistry for DemoS3ObjectStoreProvider { store: Arc, ) -> Option> { { - let guard = self.object_stores.blocking_write(); - guard.insert(url.clone(), store.clone()); + self.object_stores.insert(url.clone(), store.clone()); } Some(store.clone()) } fn get_store(&self, url: &Url) -> Result> { - self.build_s3_object_store(url) + if let Some(refx) = self.object_stores.get(url) { + let store = refx.value().clone(); + Ok(store) + } else { + let store = self.build_s3_object_store(url)?; + let _ = self.register_store(url, store.clone()); + Ok(store) + } } } diff --git a/src/catalog_provider/glue.rs b/src/catalog_provider/glue.rs index 6863581..df235df 100644 --- a/src/catalog_provider/glue.rs +++ b/src/catalog_provider/glue.rs @@ -1,8 +1,7 @@ // SPDX-License-Identifier: Apache-2.0 -use crate::error::*; +use crate::error::{GlueError, Result}; use crate::glue_data_type_parser::*; -use aws_config::BehaviorVersion; use aws_sdk_glue::types::{Column, StorageDescriptor, Table}; use aws_sdk_glue::Client; use aws_types::SdkConfig; @@ -17,7 +16,10 @@ use datafusion::datasource::file_format::FileFormat; use datafusion::datasource::listing::{ ListingOptions, ListingTable, ListingTableConfig, ListingTableUrl, }; +use datafusion::datasource::object_store::ObjectStoreRegistry; +use datafusion::datasource::TableProvider; use datafusion::execution::context::SessionState; +use deltalake::DeltaTableBuilder; use std::any::Any; use std::collections::HashMap; use std::sync::Arc; @@ -34,22 +36,18 @@ pub enum TableRegistrationOptions { pub struct GlueCatalogProvider { client: Client, schema_provider_by_database: HashMap>, + object_store_registry: Arc, } impl GlueCatalogProvider { - /// Convenience wrapper for creating a new `GlueCatalogProvider` using default configuration options. Only works with AWS. - pub async fn default() -> Self { - let shared_config = aws_config::load_defaults(BehaviorVersion::latest()).await; - GlueCatalogProvider::new(&shared_config) - } - /// Create a new Glue CatalogProvider - pub fn new(sdk_config: &SdkConfig) -> Self { - let client = Client::new(sdk_config); + pub fn new(sdk_config: SdkConfig, object_store_registry: Arc) -> Self { + let client = Client::new(&sdk_config); let schema_provider_by_database = HashMap::new(); GlueCatalogProvider { client, schema_provider_by_database, + object_store_registry, } } @@ -185,8 +183,74 @@ impl GlueCatalogProvider { let sd = Self::get_storage_descriptor(glue_table)?; let storage_location_uri = Self::get_storage_location(&sd)?; - let listing_options = - Self::get_listing_options(database_name, table_name, &sd, glue_table)?; + let table_parameters = match &glue_table.parameters { + Some(x) => x.clone(), + None => HashMap::new(), + }; + + let table_type = table_parameters + .get("table_type") + .map(|x| x.to_lowercase()) + .unwrap_or("".to_string()); + if table_type == "delta" { + self.register_delta_table(database_name, table_name, storage_location_uri) + .await?; + } else { + self.register_listing_table( + glue_table, + table_registration_options, + ctx, + database_name, + table_name, + &sd, + storage_location_uri, + ) + .await?; + } + + Ok(()) + } + + async fn register_delta_table( + &mut self, + database_name: &str, + table_name: &String, + storage_location_uri: &str, + ) -> Result<()> { + let url = url::Url::parse(storage_location_uri).map_err(|_| { + GlueError::Other(format!("Failed to parse {storage_location_uri} as url")) + })?; + let object_store = self.object_store_registry.get_store(&url)?; + + deltalake::aws::register_handlers(None); + + let builder = DeltaTableBuilder::from_uri(storage_location_uri); + + let delta_table = builder + .with_storage_backend(object_store, url) + .load() + .await + .map(|t| Arc::new(t) as Arc) + .map_err(GlueError::Deltalake)?; + + let schema_provider_for_database = self.ensure_schema_provider_for_database(database_name); + schema_provider_for_database.register_table(table_name.to_string(), delta_table)?; + + Ok(()) + } + + #[allow(clippy::too_many_arguments)] + async fn register_listing_table( + &mut self, + glue_table: &Table, + table_registration_options: &TableRegistrationOptions, + ctx: &SessionState, + database_name: &str, + table_name: &String, + sd: &StorageDescriptor, + storage_location_uri: &str, + ) -> Result<()> { + let listing_options = Self::get_listing_options(database_name, table_name, sd, glue_table)?; let schema_provider_for_database = self.ensure_schema_provider_for_database(database_name); @@ -196,7 +260,7 @@ impl GlueCatalogProvider { let ltc_with_lo_and_schema = match table_registration_options { TableRegistrationOptions::DeriveSchemaFromGlueTable => { - let schema = Self::derive_schema(database_name, table_name, &sd)?; + let schema = Self::derive_schema(database_name, table_name, sd)?; ltc_with_lo.with_schema(SchemaRef::new(schema)) } TableRegistrationOptions::InferSchemaFromData => ltc_with_lo.infer_schema(ctx).await?, @@ -206,7 +270,6 @@ impl GlueCatalogProvider { schema_provider_for_database .register_table(table_name.to_string(), Arc::new(listing_table))?; - Ok(()) } @@ -385,7 +448,6 @@ impl GlueCatalogProvider { collect_stat: true, target_partitions: 1, file_sort_order: vec![], - file_type_write_options: None, }; Ok(listing_options) diff --git a/src/error.rs b/src/error.rs index 0703226..c47fb08 100644 --- a/src/error.rs +++ b/src/error.rs @@ -18,6 +18,10 @@ pub enum GlueError { DataFusion(DataFusionError), /// Error during mapping of GlueDataType GlueDataTypeMapping(String), + /// Error during loading of delta lake table + Deltalake(deltalake::errors::DeltaTableError), + /// Other + Other(String), } impl Display for GlueError { @@ -29,6 +33,8 @@ impl Display for GlueError { GlueError::GlueDataTypeMapping(desc) => { write!(f, "Could not map glue data type: {}", desc) } + GlueError::Deltalake(e) => e.fmt(f), + GlueError::Other(msg) => write!(f, "Other: {}", msg), } } }