From 1fe8ebeb01587229d9a78679617942beaa263452 Mon Sep 17 00:00:00 2001 From: Leonid Ryzhyk Date: Sun, 17 Nov 2024 21:59:47 -0800 Subject: [PATCH] adapters: Iceberg source connector. Initial implementation of the Iceberg source connector. The connector is built on the `iceberg` crate, which still in its early days and has many limitations and performance issues. * It currently only supports primitive types (no structs, maps, lists) * It only supports reading tables (hence no sink connector yet) * It only supports snapshot reads, not table following, although I think the latter could be mostly implemented using available low-level APIs. * I haven't figured out how to do efficient range queries for time seried data: https://github.com/apache/iceberg-rust/issues/811 The implementation has a very similar structure to the Delta Lake connector and actually share a bunch of code with it (I moved some of this code to `adapterslib`, but I copied some other code, which I thought may diverge in the future). Both connectors register the table as a datafusion table provider and mostly work with it via the datafusion API. The main difference between Iceberg and Delta is that Iceberg cannot really be used without a catalog, since catalog is responsible for tracking the location of the latest metadata file (metadata file is the root object required to do anything with the Iceberg table). We currently support two of the most common catalog APIs: Glue (for Iceberg tables in AWS), and REST, which seems to be increasingly popular in the Iceberg community. We should be able to easily add SQL and hive catalogs, which are supported by the `iceberg` crate. The connector should work with tables in S3, local FS, and GCS, but only the first two have been tested. The `iceberg` crate currently doesn't support azure and other data stores, although it should be easy to add them if necessary, since they are supported by the `opendal` crate, which `iceberg` uses for FileIO. Signed-off-by: Leonid Ryzhyk --- .github/workflows/ci.yml | 8 +- Cargo.lock | 1964 +++++++++++------ Cargo.toml | 1 + Earthfile | 6 +- crates/adapters/Cargo.toml | 9 +- .../src/integrated/delta_table/input.rs | 2 +- crates/adapters/src/integrated/mod.rs | 12 +- crates/adapters/src/test/data.rs | 212 +- crates/adapters/src/test/iceberg.rs | 380 ++++ crates/adapters/src/test/mod.rs | 13 +- crates/adapters/src/transport/mod.rs | 3 +- crates/feldera-types/src/config.rs | 4 + crates/feldera-types/src/program_schema.rs | 26 + .../src/transport/delta_table.rs | 12 +- crates/feldera-types/src/transport/iceberg.rs | 356 +++ crates/feldera-types/src/transport/mod.rs | 1 + crates/iceberg/Cargo.toml | 20 + crates/iceberg/src/input.rs | 860 ++++++++ crates/iceberg/src/lib.rs | 22 + crates/iceberg/src/test/README.md | 51 + .../iceberg/src/test/create_test_table_s3.py | 229 ++ crates/iceberg/src/test/requirements.ci.txt | 5 + crates/iceberg/src/test/requirements.txt | 4 + crates/pipeline-manager/Cargo.toml | 2 +- crates/pipeline-manager/src/api/mod.rs | 6 +- .../pipeline-manager/src/db/types/program.rs | 1 + docs/connectors/sources/delta.md | 9 +- docs/connectors/sources/iceberg.md | 318 +++ docs/connectors/sources/s3.md | 5 +- docs/sidebars.js | 5 + openapi.json | 236 +- 31 files changed, 4051 insertions(+), 731 deletions(-) create mode 100644 crates/adapters/src/test/iceberg.rs create mode 100644 crates/feldera-types/src/transport/iceberg.rs create mode 100644 crates/iceberg/Cargo.toml create mode 100644 crates/iceberg/src/input.rs create mode 100644 crates/iceberg/src/lib.rs create mode 100644 crates/iceberg/src/test/README.md create mode 100644 crates/iceberg/src/test/create_test_table_s3.py create mode 100644 crates/iceberg/src/test/requirements.ci.txt create mode 100644 crates/iceberg/src/test/requirements.txt create mode 100644 docs/connectors/sources/iceberg.md diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index d609f97626..1cfae592ee 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -92,11 +92,13 @@ jobs: s3_access_key: ${{ secrets.ci_s3_aws_access_key }} s3_secret: ${{ secrets.ci_s3_aws_secret }} - # Ship secrets for the AWS CI account for the delta table output transport test to Earthly. - - name: Delta output S3 secrets + # Ship secrets for the AWS CI account for Deltalake and Iceberg adapters tests to Earthly. + - name: Delta/Iceberg S3 secrets run: | echo DELTA_TABLE_TEST_AWS_ACCESS_KEY_ID="${delta_table_test_aws_access_key_id}" >> .arg && \ - echo DELTA_TABLE_TEST_AWS_SECRET_ACCESS_KEY="${delta_table_test_aws_secret_access_key}" >> .arg + echo DELTA_TABLE_TEST_AWS_SECRET_ACCESS_KEY="${delta_table_test_aws_secret_access_key}" >> .arg && \ + echo ICEBERG_TEST_AWS_ACCESS_KEY_ID="${delta_table_test_aws_access_key_id}" >> .arg && \ + echo ICEBERG_TEST_AWS_SECRET_ACCESS_KEY="${delta_table_test_aws_secret_access_key}" >> .arg env: delta_table_test_aws_access_key_id: ${{ secrets.delta_table_test_aws_access_key_id }} delta_table_test_aws_secret_access_key: ${{ secrets.delta_table_test_aws_secret_access_key }} diff --git a/Cargo.lock b/Cargo.lock index bc1fcf8d06..dc37ef0d12 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -244,14 +244,14 @@ dependencies = [ "actix-utils", "futures-core", "http 0.2.12", - "http 1.1.0", + "http 1.2.0", "impl-more", "openssl", "pin-project-lite", "rustls-pki-types", "tokio", "tokio-openssl", - "tokio-rustls 0.26.0", + "tokio-rustls 0.26.1", "tokio-util", "tracing", "webpki-roots", @@ -448,9 +448,9 @@ dependencies = [ [[package]] name = "allocator-api2" -version = "0.2.18" +version = "0.2.21" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5c6cb57a04249c6480766f7f7cef5467412af1490f8d1e243141daddada3264f" +checksum = "683d7910e743518b0e34f1186f92494becacb047c7b6bf616c96772180fef923" [[package]] name = "android-tzdata" @@ -494,9 +494,9 @@ dependencies = [ [[package]] name = "anstream" -version = "0.6.15" +version = "0.6.18" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "64e15c1ab1f89faffbf04a634d5e1962e9074f2741eef6d97f3c4e322426d526" +checksum = "8acc5369981196006228e28809f761875c0327210a891e941f4c683b3a99529b" dependencies = [ "anstyle", "anstyle-parse", @@ -509,43 +509,43 @@ dependencies = [ [[package]] name = "anstyle" -version = "1.0.8" +version = "1.0.10" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1bec1de6f59aedf83baf9ff929c98f2ad654b97c9510f4e70cf6f661d49fd5b1" +checksum = "55cc3b69f167a1ef2e161439aa98aed94e6028e5f9a59be9a6ffb47aef1651f9" [[package]] name = "anstyle-parse" -version = "0.2.5" +version = "0.2.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "eb47de1e80c2b463c735db5b217a0ddc39d612e7ac9e2e96a5aed1f57616c1cb" +checksum = "3b2d16507662817a6a20a9ea92df6652ee4f94f914589377d69f3b21bc5798a9" dependencies = [ "utf8parse", ] [[package]] name = "anstyle-query" -version = "1.1.1" +version = "1.1.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6d36fc52c7f6c869915e99412912f22093507da8d9e942ceaf66fe4b7c14422a" +checksum = "79947af37f4177cfead1110013d678905c37501914fba0efea834c3fe9a8d60c" dependencies = [ - "windows-sys 0.52.0", + "windows-sys 0.59.0", ] [[package]] name = "anstyle-wincon" -version = "3.0.4" +version = "3.0.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5bf74e1b6e971609db8ca7a9ce79fd5768ab6ae46441c572e46cf596f59e57f8" +checksum = "2109dbce0e72be3ec00bed26e6a7479ca384ad226efdd66db8fa2e3a38c83125" dependencies = [ "anstyle", - "windows-sys 0.52.0", + "windows-sys 0.59.0", ] [[package]] name = "anyhow" -version = "1.0.91" +version = "1.0.94" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c042108f3ed77fd83760a5fd79b53be043192bb3b9dba91d8c574c0ada7850c8" +checksum = "c1fd03a028ef38ba2276dce7e33fcd6369c158a1bca17946c4b1b701891c1ff7" dependencies = [ "backtrace", ] @@ -569,8 +569,8 @@ dependencies = [ "serde_json", "strum", "strum_macros", - "thiserror 1.0.64", - "typed-builder", + "thiserror 1.0.69", + "typed-builder 0.19.1", "uuid", ] @@ -585,9 +585,9 @@ dependencies = [ [[package]] name = "arbitrary" -version = "1.3.2" +version = "1.4.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7d5a26814d8dcb93b0e5a0ff3c6d80a8843bafb21b39e8e18a6f05471870e110" +checksum = "dde20b3d026af13f561bdd0f15edf01fc734f0dafcedbaf42bba506a9517f223" dependencies = [ "derive_arbitrary", ] @@ -606,7 +606,7 @@ dependencies = [ "bzip2", "flate2", "tar", - "thiserror 1.0.64", + "thiserror 1.0.69", "xz2", "zip 0.6.6", ] @@ -785,7 +785,7 @@ dependencies = [ "arrow-schema", "chrono", "half", - "indexmap 2.6.0", + "indexmap 2.7.0", "lexical-core", "num", "serde", @@ -863,9 +863,9 @@ dependencies = [ [[package]] name = "ascii_table" -version = "4.0.4" +version = "4.0.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ed8a80a95ab122e7cc43bfde1d51949c89ff67e0c76eb795dc045003418473e2" +checksum = "adb7e515f68a8667f957e85c7d59639fe1fa2e0b06c3c2394869b73a404e52ed" [[package]] name = "assert-json-diff" @@ -912,9 +912,9 @@ dependencies = [ [[package]] name = "async-compression" -version = "0.4.15" +version = "0.4.18" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e26a9844c659a2a293d239c7910b752f8487fe122c6c8bd1659bf85a6507c302" +checksum = "df895a515f70646414f4b45c0b79082783b80552b373a68283012928df56f522" dependencies = [ "bzip2", "flate2", @@ -936,7 +936,7 @@ checksum = "30ca9a001c1e8ba5149f91a74362376cc6bc5b919d92d988668657bd570bdcec" dependencies = [ "async-task", "concurrent-queue", - "fastrand 2.1.1", + "fastrand 2.3.0", "futures-lite", "slab", ] @@ -958,9 +958,9 @@ dependencies = [ [[package]] name = "async-io" -version = "2.3.4" +version = "2.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "444b0228950ee6501b3568d3c93bf1176a1fdbc3b758dcd9475046d30f4dc7e8" +checksum = "43a2b323ccce0a1d90b449fd71f2a06ca7faa7c54c2751f06c9bd851fc061059" dependencies = [ "async-lock", "cfg-if", @@ -1136,7 +1136,7 @@ dependencies = [ "percent-encoding", "pin-project-lite", "rand", - "rustls 0.23.14", + "rustls 0.23.20", "serde", "serde_json", "serde_urlencoded", @@ -1164,7 +1164,7 @@ dependencies = [ "fastrand 1.9.0", "hex", "http 0.2.12", - "hyper 0.14.31", + "hyper 0.14.32", "ring 0.16.20", "time", "tokio", @@ -1175,24 +1175,24 @@ dependencies = [ [[package]] name = "aws-config" -version = "1.5.8" +version = "1.5.10" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7198e6f03240fdceba36656d8be440297b6b82270325908c7381f37d826a74f6" +checksum = "9b49afaa341e8dd8577e1a2200468f98956d6eda50bcf4a53246cc00174ba924" dependencies = [ "aws-credential-types 1.2.1", "aws-runtime", - "aws-sdk-sso 1.46.0", + "aws-sdk-sso 1.50.0", "aws-sdk-ssooidc", - "aws-sdk-sts 1.46.0", - "aws-smithy-async 1.2.1", + "aws-sdk-sts 1.51.0", + "aws-smithy-async 1.2.2", "aws-smithy-http 0.60.11", "aws-smithy-json 0.60.7", "aws-smithy-runtime", "aws-smithy-runtime-api", - "aws-smithy-types 1.2.7", + "aws-smithy-types 1.2.10", "aws-types 1.3.3", "bytes", - "fastrand 2.1.1", + "fastrand 2.3.0", "hex", "http 0.2.12", "ring 0.17.8", @@ -1223,9 +1223,9 @@ version = "1.2.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "60e8f6b615cb5fc60a98132268508ad104310f0cfb25a1c22eee76efdf9154da" dependencies = [ - "aws-smithy-async 1.2.1", + "aws-smithy-async 1.2.2", "aws-smithy-runtime-api", - "aws-smithy-types 1.2.7", + "aws-smithy-types 1.2.10", "zeroize", ] @@ -1264,21 +1264,20 @@ dependencies = [ [[package]] name = "aws-lc-rs" -version = "1.10.0" +version = "1.11.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cdd82dba44d209fddb11c190e0a94b78651f95299598e472215667417a03ff1d" +checksum = "f47bb8cc16b669d267eeccf585aea077d0882f4777b1c1f740217885d6e6e5a3" dependencies = [ "aws-lc-sys", - "mirai-annotations", "paste", "zeroize", ] [[package]] name = "aws-lc-sys" -version = "0.22.0" +version = "0.23.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "df7a4168111d7eb622a31b214057b8509c0a7e1794f44c546d742330dc793972" +checksum = "a2101df3813227bbaaaa0b04cd61c534c7954b22bd68d399b440be937dc63ff7" dependencies = [ "bindgen", "cc", @@ -1291,21 +1290,21 @@ dependencies = [ [[package]] name = "aws-runtime" -version = "1.4.3" +version = "1.4.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a10d5c055aa540164d9561a0e2e74ad30f0dcf7393c3a92f6733ddf9c5762468" +checksum = "b5ac934720fbb46206292d2c75b57e67acfc56fe7dfd34fb9a02334af08409ea" dependencies = [ "aws-credential-types 1.2.1", - "aws-sigv4 1.2.4", - "aws-smithy-async 1.2.1", + "aws-sigv4 1.2.6", + "aws-smithy-async 1.2.2", "aws-smithy-eventstream", "aws-smithy-http 0.60.11", "aws-smithy-runtime", "aws-smithy-runtime-api", - "aws-smithy-types 1.2.7", + "aws-smithy-types 1.2.10", "aws-types 1.3.3", "bytes", - "fastrand 2.1.1", + "fastrand 2.3.0", "http 0.2.12", "http-body 0.4.6", "once_cell", @@ -1342,21 +1341,43 @@ dependencies = [ [[package]] name = "aws-sdk-dynamodb" -version = "1.50.0" +version = "1.55.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f38c3122dd27386bf38745f67c9f2c2c47479157bc8a697a3fd97ff45e78dd34" +checksum = "a18e18b3cf6b75c1fcb15e677f6dbd2a6d8dfe4d168e0a36721f7a6167c6c829" dependencies = [ "aws-credential-types 1.2.1", "aws-runtime", - "aws-smithy-async 1.2.1", + "aws-smithy-async 1.2.2", "aws-smithy-http 0.60.11", - "aws-smithy-json 0.60.7", + "aws-smithy-json 0.61.1", + "aws-smithy-runtime", + "aws-smithy-runtime-api", + "aws-smithy-types 1.2.10", + "aws-types 1.3.3", + "bytes", + "fastrand 2.3.0", + "http 0.2.12", + "once_cell", + "regex-lite", + "tracing", +] + +[[package]] +name = "aws-sdk-glue" +version = "1.72.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "266cad033e81e7297a96b75f686f2f22c3e5a989619b2b3dc7dbbf2af3aeab22" +dependencies = [ + "aws-credential-types 1.2.1", + "aws-runtime", + "aws-smithy-async 1.2.2", + "aws-smithy-http 0.60.11", + "aws-smithy-json 0.61.1", "aws-smithy-runtime", "aws-smithy-runtime-api", - "aws-smithy-types 1.2.7", + "aws-smithy-types 1.2.10", "aws-types 1.3.3", "bytes", - "fastrand 2.1.1", "http 0.2.12", "once_cell", "regex-lite", @@ -1365,26 +1386,25 @@ dependencies = [ [[package]] name = "aws-sdk-s3" -version = "1.56.0" +version = "1.65.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cecd672c8d4265fd4fbecacd4a479180e616881bbe639250cf81ddb604e4c301" +checksum = "d3ba2c5c0f2618937ce3d4a5ad574b86775576fa24006bcb3128c6e2cbf3c34e" dependencies = [ - "ahash 0.8.11", "aws-credential-types 1.2.1", "aws-runtime", - "aws-sigv4 1.2.4", - "aws-smithy-async 1.2.1", + "aws-sigv4 1.2.6", + "aws-smithy-async 1.2.2", "aws-smithy-checksums", "aws-smithy-eventstream", "aws-smithy-http 0.60.11", - "aws-smithy-json 0.60.7", + "aws-smithy-json 0.61.1", "aws-smithy-runtime", "aws-smithy-runtime-api", - "aws-smithy-types 1.2.7", + "aws-smithy-types 1.2.10", "aws-smithy-xml 0.60.9", "aws-types 1.3.3", "bytes", - "fastrand 2.1.1", + "fastrand 2.3.0", "hex", "hmac", "http 0.2.12", @@ -1425,18 +1445,18 @@ dependencies = [ [[package]] name = "aws-sdk-sso" -version = "1.46.0" +version = "1.50.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0dc2faec3205d496c7e57eff685dd944203df7ce16a4116d0281c44021788a7b" +checksum = "05ca43a4ef210894f93096039ef1d6fa4ad3edfabb3be92b80908b9f2e4b4eab" dependencies = [ "aws-credential-types 1.2.1", "aws-runtime", - "aws-smithy-async 1.2.1", + "aws-smithy-async 1.2.2", "aws-smithy-http 0.60.11", - "aws-smithy-json 0.60.7", + "aws-smithy-json 0.61.1", "aws-smithy-runtime", "aws-smithy-runtime-api", - "aws-smithy-types 1.2.7", + "aws-smithy-types 1.2.10", "aws-types 1.3.3", "bytes", "http 0.2.12", @@ -1447,18 +1467,18 @@ dependencies = [ [[package]] name = "aws-sdk-ssooidc" -version = "1.47.0" +version = "1.51.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c93c241f52bc5e0476e259c953234dab7e2a35ee207ee202e86c0095ec4951dc" +checksum = "abaf490c2e48eed0bb8e2da2fb08405647bd7f253996e0f93b981958ea0f73b0" dependencies = [ "aws-credential-types 1.2.1", "aws-runtime", - "aws-smithy-async 1.2.1", + "aws-smithy-async 1.2.2", "aws-smithy-http 0.60.11", - "aws-smithy-json 0.60.7", + "aws-smithy-json 0.61.1", "aws-smithy-runtime", "aws-smithy-runtime-api", - "aws-smithy-types 1.2.7", + "aws-smithy-types 1.2.10", "aws-types 1.3.3", "bytes", "http 0.2.12", @@ -1495,19 +1515,19 @@ dependencies = [ [[package]] name = "aws-sdk-sts" -version = "1.46.0" +version = "1.51.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b259429be94a3459fa1b00c5684faee118d74f9577cc50aebadc36e507c63b5f" +checksum = "b68fde0d69c8bfdc1060ea7da21df3e39f6014da316783336deff0a9ec28f4bf" dependencies = [ "aws-credential-types 1.2.1", "aws-runtime", - "aws-smithy-async 1.2.1", + "aws-smithy-async 1.2.2", "aws-smithy-http 0.60.11", - "aws-smithy-json 0.60.7", + "aws-smithy-json 0.61.1", "aws-smithy-query 0.60.7", "aws-smithy-runtime", "aws-smithy-runtime-api", - "aws-smithy-types 1.2.7", + "aws-smithy-types 1.2.10", "aws-smithy-xml 0.60.9", "aws-types 1.3.3", "http 0.2.12", @@ -1551,22 +1571,22 @@ dependencies = [ [[package]] name = "aws-sigv4" -version = "1.2.4" +version = "1.2.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cc8db6904450bafe7473c6ca9123f88cc11089e41a025408f992db4e22d3be68" +checksum = "7d3820e0c08d0737872ff3c7c1f21ebbb6693d832312d6152bf18ef50a5471c2" dependencies = [ "aws-credential-types 1.2.1", "aws-smithy-eventstream", "aws-smithy-http 0.60.11", "aws-smithy-runtime-api", - "aws-smithy-types 1.2.7", + "aws-smithy-types 1.2.10", "bytes", "crypto-bigint 0.5.5", "form_urlencoded", "hex", "hmac", "http 0.2.12", - "http 1.1.0", + "http 1.2.0", "once_cell", "p256", "percent-encoding", @@ -1592,9 +1612,9 @@ dependencies = [ [[package]] name = "aws-smithy-async" -version = "1.2.1" +version = "1.2.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "62220bc6e97f946ddd51b5f1361f78996e704677afc518a4ff66b7a72ea1378c" +checksum = "8aa8ff1492fd9fb99ae28e8467af0dbbb7c31512b16fabf1a0f10d7bb6ef78bb" dependencies = [ "futures-util", "pin-project-lite", @@ -1603,12 +1623,12 @@ dependencies = [ [[package]] name = "aws-smithy-checksums" -version = "0.60.12" +version = "0.60.13" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "598b1689d001c4d4dc3cb386adb07d37786783aee3ac4b324bcadac116bf3d23" +checksum = "ba1a71073fca26775c8b5189175ea8863afb1c9ea2cceb02a5de5ad9dfbaa795" dependencies = [ "aws-smithy-http 0.60.11", - "aws-smithy-types 1.2.7", + "aws-smithy-types 1.2.10", "bytes", "crc32c", "crc32fast", @@ -1636,7 +1656,7 @@ dependencies = [ "fastrand 1.9.0", "http 0.2.12", "http-body 0.4.6", - "hyper 0.14.31", + "hyper 0.14.32", "hyper-rustls 0.23.2", "lazy_static", "pin-project-lite", @@ -1652,7 +1672,7 @@ version = "0.60.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "cef7d0a272725f87e51ba2bf89f8c21e4df61b9e49ae1ac367a6d69916ef7c90" dependencies = [ - "aws-smithy-types 1.2.7", + "aws-smithy-types 1.2.10", "bytes", "crc32fast", ] @@ -1669,7 +1689,7 @@ dependencies = [ "futures-core", "http 0.2.12", "http-body 0.4.6", - "hyper 0.14.31", + "hyper 0.14.32", "once_cell", "percent-encoding", "pin-project-lite", @@ -1687,7 +1707,7 @@ checksum = "5c8bc3e8fdc6b8d07d976e301c02fe553f72a39b7a9fea820e023268467d7ab6" dependencies = [ "aws-smithy-eventstream", "aws-smithy-runtime-api", - "aws-smithy-types 1.2.7", + "aws-smithy-types 1.2.10", "bytes", "bytes-utils", "futures-core", @@ -1731,7 +1751,16 @@ version = "0.60.7" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "4683df9469ef09468dad3473d129960119a0d3593617542b7d52086c8486f2d6" dependencies = [ - "aws-smithy-types 1.2.7", + "aws-smithy-types 1.2.10", +] + +[[package]] +name = "aws-smithy-json" +version = "0.61.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ee4e69cc50921eb913c6b662f8d909131bb3e6ad6cb6090d3a39b66fc5c52095" +dependencies = [ + "aws-smithy-types 1.2.10", ] [[package]] @@ -1750,28 +1779,28 @@ version = "0.60.7" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f2fbd61ceb3fe8a1cb7352e42689cec5335833cd9f94103a61e98f9bb61c64bb" dependencies = [ - "aws-smithy-types 1.2.7", + "aws-smithy-types 1.2.10", "urlencoding", ] [[package]] name = "aws-smithy-runtime" -version = "1.7.2" +version = "1.7.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a065c0fe6fdbdf9f11817eb68582b2ab4aff9e9c39e986ae48f7ec576c6322db" +checksum = "431a10d0e07e09091284ef04453dae4069283aa108d209974d67e77ae1caa658" dependencies = [ - "aws-smithy-async 1.2.1", + "aws-smithy-async 1.2.2", "aws-smithy-http 0.60.11", "aws-smithy-runtime-api", - "aws-smithy-types 1.2.7", + "aws-smithy-types 1.2.10", "bytes", - "fastrand 2.1.1", + "fastrand 2.3.0", "h2 0.3.26", "http 0.2.12", "http-body 0.4.6", "http-body 1.0.1", "httparse", - "hyper 0.14.31", + "hyper 0.14.32", "hyper-rustls 0.24.2", "once_cell", "pin-project-lite", @@ -1783,15 +1812,15 @@ dependencies = [ [[package]] name = "aws-smithy-runtime-api" -version = "1.7.2" +version = "1.7.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e086682a53d3aa241192aa110fa8dfce98f2f5ac2ead0de84d41582c7e8fdb96" +checksum = "92165296a47a812b267b4f41032ff8069ab7ff783696d217f0994a0d7ab585cd" dependencies = [ - "aws-smithy-async 1.2.1", - "aws-smithy-types 1.2.7", + "aws-smithy-async 1.2.2", + "aws-smithy-types 1.2.10", "bytes", "http 0.2.12", - "http 1.1.0", + "http 1.2.0", "pin-project-lite", "tokio", "tracing", @@ -1813,16 +1842,16 @@ dependencies = [ [[package]] name = "aws-smithy-types" -version = "1.2.7" +version = "1.2.10" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "147100a7bea70fa20ef224a6bad700358305f5dc0f84649c53769761395b355b" +checksum = "8ecbf4d5dfb169812e2b240a4350f15ad3c6b03a54074e5712818801615f2dc5" dependencies = [ "base64-simd", "bytes", "bytes-utils", "futures-core", "http 0.2.12", - "http 1.1.0", + "http 1.2.0", "http-body 0.4.6", "http-body 1.0.1", "http-body-util", @@ -1878,24 +1907,24 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5221b91b3e441e6675310829fd8984801b772cb1546ef6c0e54dec9f1ac13fef" dependencies = [ "aws-credential-types 1.2.1", - "aws-smithy-async 1.2.1", + "aws-smithy-async 1.2.2", "aws-smithy-runtime-api", - "aws-smithy-types 1.2.7", + "aws-smithy-types 1.2.10", "rustc_version", "tracing", ] [[package]] name = "axum" -version = "0.7.7" +version = "0.7.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "504e3947307ac8326a5437504c517c4b56716c9d98fac0028c2acc7ca47d70ae" +checksum = "edca88bc138befd0323b20752846e6587272d3b03b0343c8ea28a6f819e6e71f" dependencies = [ "async-trait", "axum-core", "bytes", "futures-util", - "http 1.1.0", + "http 1.2.0", "http-body 1.0.1", "http-body-util", "itoa", @@ -1906,8 +1935,8 @@ dependencies = [ "pin-project-lite", "rustversion", "serde", - "sync_wrapper 1.0.1", - "tower 0.5.1", + "sync_wrapper 1.0.2", + "tower 0.5.2", "tower-layer", "tower-service", ] @@ -1921,24 +1950,25 @@ dependencies = [ "async-trait", "bytes", "futures-util", - "http 1.1.0", + "http 1.2.0", "http-body 1.0.1", "http-body-util", "mime", "pin-project-lite", "rustversion", - "sync_wrapper 1.0.1", + "sync_wrapper 1.0.2", "tower-layer", "tower-service", ] [[package]] name = "backon" -version = "1.2.0" +version = "1.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e4fa97bb310c33c811334143cf64c5bb2b7b3c06e453db6b095d7061eff8f113" +checksum = "ba5289ec98f68f28dd809fd601059e6aa908bb8f6108620930828283d4ee23d7" dependencies = [ - "fastrand 2.1.1", + "fastrand 2.3.0", + "gloo-timers", "tokio", ] @@ -1999,9 +2029,9 @@ checksum = "8c3c1a368f70d6cf7302d78f8f7093da241fb8e8807c05cc9e51a125895a6d5b" [[package]] name = "bigdecimal" -version = "0.4.5" +version = "0.4.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "51d712318a27c7150326677b321a5fa91b55f6d9034ffd67f20319e147d40cee" +checksum = "7f31f3af01c5c65a07985c804d3366560e6fa7883d640a122819b14ec327482c" dependencies = [ "autocfg", "libm", @@ -2011,6 +2041,12 @@ dependencies = [ "serde", ] +[[package]] +name = "bimap" +version = "0.6.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "230c5f1ca6a325a32553f8640d31ac9b49f2411e901e427570154868b46da4f7" + [[package]] name = "bincode" version = "1.3.3" @@ -2069,18 +2105,18 @@ dependencies = [ [[package]] name = "bit-set" -version = "0.5.3" +version = "0.8.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0700ddab506f33b20a03b13996eccd309a48e5ff77d0d95926aa0210fb4e95f1" +checksum = "08807e080ed7f9d5433fa9b275196cfc35414f66a0c79d864dc51a0d825231a3" dependencies = [ "bit-vec", ] [[package]] name = "bit-vec" -version = "0.6.3" +version = "0.8.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "349f9b6a179ed607305526ca489b34ad0a41aed5f7980fa90eb03160b69598fb" +checksum = "5e764a1d40d510daf35e07be9eb06e75770908c27d411ee6c92109c9840eaaf7" [[package]] name = "bitflags" @@ -2117,9 +2153,9 @@ dependencies = [ [[package]] name = "blake3" -version = "1.5.4" +version = "1.5.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d82033247fd8e890df8f740e407ad4d038debb9eb1f40533fffb32e7d17dc6f7" +checksum = "b8ee0c1824c4dea5b5f81736aff91bae041d2c07ee1192bec91054e10e3e601e" dependencies = [ "arrayref", "arrayvec 0.7.6", @@ -2152,9 +2188,9 @@ dependencies = [ [[package]] name = "borsh" -version = "1.5.1" +version = "1.5.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a6362ed55def622cddc70a4746a68554d7b687713770de539e59a739b249f8ed" +checksum = "2506947f73ad44e344215ccd6403ac2ae18cd8e046e581a441bf8d199f257f03" dependencies = [ "borsh-derive", "cfg_aliases 0.2.1", @@ -2162,16 +2198,15 @@ dependencies = [ [[package]] name = "borsh-derive" -version = "1.5.1" +version = "1.5.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c3ef8005764f53cd4dca619f5bf64cafd4664dada50ece25e4d81de54c80cc0b" +checksum = "c2593a3b8b938bd68373196c9832f516be11fa487ef4ae745eb282e6a56a7244" dependencies = [ "once_cell", - "proc-macro-crate 3.2.0", + "proc-macro-crate", "proc-macro2", "quote", "syn 2.0.90", - "syn_derive", ] [[package]] @@ -2255,9 +2290,9 @@ checksum = "5ce89b21cab1437276d2650d57e971f9d548a2d9037cc231abdc0562b97498ce" [[package]] name = "bytemuck" -version = "1.19.0" +version = "1.20.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8334215b81e418a0a7bdb8ef0849474f40bb10c8b71f1c4ed315cff49f32494d" +checksum = "8b37c88a63ffd85d15b406896cc343916d7cf57838a847b3a6f2ca5d39a5695a" dependencies = [ "bytemuck_derive", ] @@ -2281,9 +2316,9 @@ checksum = "1fd0f2584146f6f2ef48085050886acf353beff7305ebd1ae69500e27c67f64b" [[package]] name = "bytes" -version = "1.7.2" +version = "1.9.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "428d9aa8fbc0670b7b8d6030a7fadd0f86151cae55e4dbbece15f3780a3dfaf3" +checksum = "325918d6fe32f23b19878fe4b34794ae41fc19ddbe53b10571a4874d44ffd39b" [[package]] name = "bytes-utils" @@ -2297,9 +2332,9 @@ dependencies = [ [[package]] name = "bytestring" -version = "1.3.1" +version = "1.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "74d80203ea6b29df88012294f62733de21cfeab47f17b41af3a38bc30a03ee72" +checksum = "e465647ae23b2823b0753f50decb2d5a86d2bb2cac04788fafd1f80e45378e5f" dependencies = [ "bytes", ] @@ -2346,7 +2381,7 @@ dependencies = [ "instant", "lazy_static", "once_cell", - "thiserror 1.0.64", + "thiserror 1.0.69", "tokio", ] @@ -2365,7 +2400,7 @@ dependencies = [ "instant", "lazy_static", "once_cell", - "thiserror 1.0.64", + "thiserror 1.0.69", "tokio", ] @@ -2408,9 +2443,9 @@ checksum = "37b2a672a2cb129a2e41c10b1224bb368f9f37a2b16b612598138befd7b37eb5" [[package]] name = "cc" -version = "1.1.30" +version = "1.2.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b16803a61b81d9eabb7eae2588776c4c1e584b738ede45fdbb4c972cec1e9945" +checksum = "9157bbaa6b165880c27a4293a474c91cdcf265cc68cc829bf10be0964a391caf" dependencies = [ "jobserver", "libc", @@ -2456,9 +2491,9 @@ dependencies = [ [[package]] name = "chrono" -version = "0.4.38" +version = "0.4.39" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a21f936df1771bf62b77f047b726c4625ff2e8aa607c01ec06e5a05bd8463401" +checksum = "7e36cc9d416881d2e24f9a963be5fb1cd90966419ac844274161d10488b3e825" dependencies = [ "android-tzdata", "iana-time-zone", @@ -2568,9 +2603,9 @@ dependencies = [ [[package]] name = "clap" -version = "4.5.20" +version = "4.5.23" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b97f376d85a664d5837dbae44bf546e6477a679ff6610010f17276f686d867e8" +checksum = "3135e7ec2ef7b10c6ed8950f0f792ed96ee093fa088608f1c76e569722700c84" dependencies = [ "clap_builder", "clap_derive 4.5.18", @@ -2578,25 +2613,25 @@ dependencies = [ [[package]] name = "clap_builder" -version = "4.5.20" +version = "4.5.23" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "19bc80abd44e4bed93ca373a0704ccbd1b710dc5749406201bb018272808dc54" +checksum = "30582fc632330df2bd26877bde0c1f4470d57c582bbc070376afcd04d8cb4838" dependencies = [ "anstream", "anstyle", - "clap_lex 0.7.2", + "clap_lex 0.7.4", "strsim 0.11.1", "terminal_size", ] [[package]] name = "clap_complete" -version = "4.5.33" +version = "4.5.39" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9646e2e245bf62f45d39a0f3f36f1171ad1ea0d6967fd114bca72cb02a8fcdfb" +checksum = "fd4db298d517d5fa00b2b84bbe044efd3fde43874a41db0d46f91994646a2da4" dependencies = [ - "clap 4.5.20", - "clap_lex 0.7.2", + "clap 4.5.23", + "clap_lex 0.7.4", "is_executable", "shlex", ] @@ -2637,9 +2672,9 @@ dependencies = [ [[package]] name = "clap_lex" -version = "0.7.2" +version = "0.7.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1462739cb27611015575c0c11df5df7601141071f07518d56fcc1be504cbec97" +checksum = "f46ad14479a25103f283c0f10005961cf086d8dc42205bb44c46ac563475dca6" [[package]] name = "clipboard-win" @@ -2652,38 +2687,38 @@ dependencies = [ [[package]] name = "cmake" -version = "0.1.51" +version = "0.1.52" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fb1e43aa7fd152b1f968787f7dbcdeb306d1867ff373c69955211876c053f91a" +checksum = "c682c223677e0e5b6b7f63a64b9351844c3f1b1678a68b7ee617e30fb082620e" dependencies = [ "cc", ] [[package]] name = "colorchoice" -version = "1.0.2" +version = "1.0.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d3fd119d74b830634cea2a0f58bbd0d54540518a14397557951e79340abc28c0" +checksum = "5b63caa9aa9397e2d9480a9b13673856c78d8ac123288526c37d7839f2a86990" [[package]] name = "colored" -version = "2.1.0" +version = "2.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cbf2150cce219b664a8a70df7a1f933836724b503f8a413af9365b4dcc4d90b8" +checksum = "117725a109d387c937a1533ce01b450cbde6b88abceea8473c4d7a85853cda3c" dependencies = [ "lazy_static", - "windows-sys 0.48.0", + "windows-sys 0.59.0", ] [[package]] name = "comfy-table" -version = "7.1.1" +version = "7.1.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b34115915337defe99b2aff5c2ce6771e5fbc4079f4b506301f5cf394c8452f7" +checksum = "24f165e7b643266ea80cb858aed492ad9280e3e05ce24d4a99d7d7b889b6a4d9" dependencies = [ "strum", "strum_macros", - "unicode-width", + "unicode-width 0.2.0", ] [[package]] @@ -2697,15 +2732,15 @@ dependencies = [ [[package]] name = "console" -version = "0.15.8" +version = "0.15.10" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0e1f83fc076bd6dd27517eacdf25fef6c4dfe5f1d7448bafaaf3a26f13b5e4eb" +checksum = "ea3c6ecd8059b57859df5c69830340ed3c41d30e3da0c1cbed90a96ac853041b" dependencies = [ "encode_unicode", - "lazy_static", "libc", - "unicode-width", - "windows-sys 0.52.0", + "once_cell", + "unicode-width 0.2.0", + "windows-sys 0.59.0", ] [[package]] @@ -2773,6 +2808,16 @@ dependencies = [ "libc", ] +[[package]] +name = "core-foundation" +version = "0.10.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b55271e5c8c478ad3f38ad24ef34923091e0548492a266d19b3c0b4d82574c63" +dependencies = [ + "core-foundation-sys", + "libc", +] + [[package]] name = "core-foundation-sys" version = "0.8.7" @@ -2799,9 +2844,9 @@ dependencies = [ [[package]] name = "cpufeatures" -version = "0.2.14" +version = "0.2.16" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "608697df725056feaccfa42cffdaeeec3fccc4ffc38358ecd19b243e716a78e0" +checksum = "16b80225097f2e5ae4e7179dd2266824648f3e2f49d9134d584b76389d31c4c3" dependencies = [ "libc", ] @@ -2833,7 +2878,7 @@ dependencies = [ "anes", "cast", "ciborium", - "clap 4.5.20", + "clap 4.5.23", "criterion-plot", "is-terminal", "itertools 0.10.5", @@ -2862,9 +2907,9 @@ dependencies = [ [[package]] name = "critical-section" -version = "1.1.3" +version = "1.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f64009896348fc5af4222e9cf7d7d82a95a256c634ebcf61c53e4ea461422242" +checksum = "790eea4361631c5e7d22598ecd5723ff611904e3344ce8720784c93e3d83d40b" [[package]] name = "crossbeam" @@ -2881,18 +2926,18 @@ dependencies = [ [[package]] name = "crossbeam-channel" -version = "0.5.13" +version = "0.5.14" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "33480d6946193aa8033910124896ca395333cae7e2d1113d1fef6c3272217df2" +checksum = "06ba6d68e24814cb8de6bb986db8222d3a027d15872cabc0d18817bc3c0e4471" dependencies = [ "crossbeam-utils", ] [[package]] name = "crossbeam-deque" -version = "0.8.5" +version = "0.8.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "613f8cc01fe9cf1a3eb3d7f488fd2fa8388403e97039e2f73692932e291a770d" +checksum = "9dd111b7b7f7d55b72c0a6ae361660ee5853c9af73f70c3c2ef6858b950e2e51" dependencies = [ "crossbeam-epoch", "crossbeam-utils", @@ -2909,18 +2954,18 @@ dependencies = [ [[package]] name = "crossbeam-queue" -version = "0.3.11" +version = "0.3.12" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "df0346b5d5e76ac2fe4e327c5fd1118d6be7c51dfb18f9b7922923f287471e35" +checksum = "0f58bbc28f91df819d0aa2a2c00cd19754769c2fad90579b3592b1c9ba7a3115" dependencies = [ "crossbeam-utils", ] [[package]] name = "crossbeam-utils" -version = "0.8.20" +version = "0.8.21" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "22ec99545bb0ed0ea7bb9b8e1e9122ea386ff8a48c0922e43f36d45ab09e0e80" +checksum = "d0a5c400df2834b80a4c3327b3aad3a4c4cd4de0629063962b03235697506a28" [[package]] name = "crossterm" @@ -2987,9 +3032,9 @@ dependencies = [ [[package]] name = "csv" -version = "1.3.0" +version = "1.3.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ac574ff4d437a7b5ad237ef331c17ccca63c46479e5b5453eb8e10bb99a759fe" +checksum = "acdc4883a9c96732e4733212c01447ebd805833b7275a73ca3ee080fd77afdaf" dependencies = [ "csv-core", "itoa", @@ -3149,9 +3194,9 @@ dependencies = [ [[package]] name = "dary_heap" -version = "0.3.6" +version = "0.3.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7762d17f1241643615821a8455a0b2c3e803784b058693d990b11f2dce25a0ca" +checksum = "04d2cd9c18b9f454ed67da600630b021a8a80bf33f8c95896ab33aaf1c26b728" [[package]] name = "dashmap" @@ -3217,7 +3262,7 @@ dependencies = [ "glob", "half", "hashbrown 0.14.5", - "indexmap 2.6.0", + "indexmap 2.7.0", "itertools 0.13.0", "log", "num_cpus", @@ -3266,7 +3311,7 @@ dependencies = [ "chrono", "half", "hashbrown 0.14.5", - "indexmap 2.6.0", + "indexmap 2.7.0", "instant", "libc", "num_cpus", @@ -3324,7 +3369,7 @@ dependencies = [ "datafusion-functions-aggregate-common", "datafusion-functions-window-common", "datafusion-physical-expr-common", - "indexmap 2.6.0", + "indexmap 2.7.0", "paste", "serde_json", "sqlparser", @@ -3387,7 +3432,7 @@ dependencies = [ "datafusion-physical-expr", "datafusion-physical-expr-common", "half", - "indexmap 2.6.0", + "indexmap 2.7.0", "log", "paste", ] @@ -3467,7 +3512,7 @@ dependencies = [ "datafusion-expr", "datafusion-physical-expr", "hashbrown 0.14.5", - "indexmap 2.6.0", + "indexmap 2.7.0", "itertools 0.13.0", "log", "paste", @@ -3495,7 +3540,7 @@ dependencies = [ "datafusion-physical-expr-common", "half", "hashbrown 0.14.5", - "indexmap 2.6.0", + "indexmap 2.7.0", "itertools 0.13.0", "log", "paste", @@ -3557,7 +3602,7 @@ dependencies = [ "futures", "half", "hashbrown 0.14.5", - "indexmap 2.6.0", + "indexmap 2.7.0", "itertools 0.13.0", "log", "once_cell", @@ -3580,7 +3625,7 @@ dependencies = [ "datafusion-expr", "datafusion-proto-common", "object_store", - "prost 0.13.3", + "prost 0.13.4", ] [[package]] @@ -3593,7 +3638,7 @@ dependencies = [ "chrono", "datafusion-common", "object_store", - "prost 0.13.3", + "prost 0.13.4", ] [[package]] @@ -3607,7 +3652,7 @@ dependencies = [ "arrow-schema", "datafusion-common", "datafusion-expr", - "indexmap 2.6.0", + "indexmap 2.7.0", "log", "regex", "sqlparser", @@ -3622,7 +3667,7 @@ dependencies = [ "arc-swap", "binrw", "chrono", - "clap 4.5.20", + "clap 4.5.23", "crc32c", "criterion", "crossbeam", @@ -3676,7 +3721,7 @@ dependencies = [ "tarpc", "tempfile", "textwrap 0.15.2", - "thiserror 1.0.64", + "thiserror 1.0.69", "time", "tokio", "tracing", @@ -3710,7 +3755,7 @@ dependencies = [ "bytestring", "chrono", "circular-queue", - "clap 4.5.20", + "clap 4.5.23", "colored", "crossbeam", "csv", @@ -3723,6 +3768,7 @@ dependencies = [ "erased-serde", "feldera-adapterlib", "feldera-datagen", + "feldera-iceberg", "feldera-size-of", "feldera-sqllib", "feldera-types", @@ -3736,7 +3782,7 @@ dependencies = [ "google-cloud-pubsub", "governor 0.7.0", "home", - "indexmap 2.6.0", + "indexmap 2.7.0", "jemalloc_pprof", "lazy_static", "metrics", @@ -3749,7 +3795,7 @@ dependencies = [ "nonzero_ext", "num-bigint", "once_cell", - "ordered-float 4.3.0", + "ordered-float 4.5.0", "parquet", "pretty_assertions", "proptest", @@ -3763,7 +3809,7 @@ dependencies = [ "rmp-serde", "rmpv", "rust_decimal_macros", - "rustls 0.23.14", + "rustls 0.23.20", "schema_registry_converter", "serde", "serde_arrow", @@ -3888,15 +3934,19 @@ dependencies = [ "chrono", "delta_kernel_derive", "fix-hidden-lifetime-bug", - "indexmap 2.6.0", + "futures", + "indexmap 2.7.0", "itertools 0.13.0", + "object_store", "parquet", + "reqwest 0.12.9", "roaring", "rustc_version", "serde", "serde_json", "strum", - "thiserror 1.0.64", + "thiserror 1.0.69", + "tokio", "tracing", "url", "uuid", @@ -3934,10 +3984,10 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7ed809d88a572dd0812aaae27b70edec133cb8ac8d1faa34731cd533c4fa662e" dependencies = [ "async-trait", - "aws-config 1.5.8", + "aws-config 1.5.10", "aws-credential-types 1.2.1", "aws-sdk-dynamodb", - "aws-sdk-sts 1.46.0", + "aws-sdk-sts 1.51.0", "aws-smithy-runtime-api", "backon", "bytes", @@ -3948,7 +3998,7 @@ dependencies = [ "maplit", "object_store", "regex", - "thiserror 2.0.3", + "thiserror 2.0.7", "tokio", "tracing", "url", @@ -3968,7 +4018,7 @@ dependencies = [ "lazy_static", "object_store", "regex", - "thiserror 2.0.3", + "thiserror 2.0.7", "tokio", "tracing", "url", @@ -3976,9 +4026,9 @@ dependencies = [ [[package]] name = "deltalake-core" -version = "0.22.2" +version = "0.22.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9f254e14de9b1ff46419253c83ddfc65bc1bb254825ba0d28dd8ce9e38c6782d" +checksum = "e04f2ca590af391e0bcb683d7c24f72a60881dfd320809959ea0af9891b2f1e6" dependencies = [ "arrow", "arrow-arith", @@ -4011,7 +4061,7 @@ dependencies = [ "fix-hidden-lifetime-bug", "futures", "hashbrown 0.15.2", - "indexmap 2.6.0", + "indexmap 2.7.0", "itertools 0.13.0", "lazy_static", "libc", @@ -4031,7 +4081,7 @@ dependencies = [ "serde", "serde_json", "sqlparser", - "thiserror 2.0.3", + "thiserror 2.0.7", "tokio", "tracing", "url", @@ -4053,7 +4103,7 @@ dependencies = [ "lazy_static", "object_store", "regex", - "thiserror 2.0.3", + "thiserror 2.0.7", "tokio", "tracing", "url", @@ -4081,15 +4131,46 @@ dependencies = [ [[package]] name = "derive_arbitrary" -version = "1.3.2" +version = "1.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "30542c1ad912e0e3d22a1935c290e12e8a29d704a420177a31faad4a601a0800" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.90", +] + +[[package]] +name = "derive_builder" +version = "0.20.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "507dfb09ea8b7fa618fcf76e953f4f5e192547945816d5358edffe39f6f94947" +dependencies = [ + "derive_builder_macro", +] + +[[package]] +name = "derive_builder_core" +version = "0.20.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "67e77553c4162a157adbf834ebae5b415acbecbeafc7a74b0e886657506a7611" +checksum = "2d5bcf7b024d6835cfb3d473887cd966994907effbe9227e8c8219824d06c4e8" dependencies = [ + "darling 0.20.10", "proc-macro2", "quote", "syn 2.0.90", ] +[[package]] +name = "derive_builder_macro" +version = "0.20.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ab63b0e2bf4d5928aff72e83a7dace85d7bba5fe12dcc3c5a572d78caffd3f3c" +dependencies = [ + "derive_builder_core", + "syn 2.0.90", +] + [[package]] name = "derive_more" version = "0.99.18" @@ -4143,6 +4224,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9ed9a281f7bc9b7576e61468ba615a66a5c8cfdff42420a70aa82701a3b1e292" dependencies = [ "block-buffer", + "const-oid", "crypto-common", "subtle", ] @@ -4214,6 +4296,15 @@ version = "0.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0688c2a7f92e427f44895cd63841bff7b29f8d7a1648b9e7e07a4a365b2e1257" +[[package]] +name = "dlv-list" +version = "0.5.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "442039f5147480ba31067cb00ada1adae6892028e40e45fc5de7b7df6dcc1b5f" +dependencies = [ + "const-random", +] + [[package]] name = "downcast" version = "0.11.0" @@ -4306,15 +4397,15 @@ dependencies = [ [[package]] name = "encode_unicode" -version = "0.3.6" +version = "1.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a357d28ed41a50f9c765dbfe56cbc04a64e53e5fc58ba79fbc34c10ef3df831f" +checksum = "34aa73646ffb006b8f5147f3dc182bd4bcb190227ce861fc4a4844bf8e3cb2c0" [[package]] name = "encoding_rs" -version = "0.8.34" +version = "0.8.35" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b45de904aa0b010bce2ab45264d0631681847fa7b6f2eaa7dab7619943bc4f59" +checksum = "75030f3c4f45dafd7586dd6780965a8c7e8e285a5ecb86713e63a79c5b2766f3" dependencies = [ "cfg-if", ] @@ -4411,12 +4502,12 @@ dependencies = [ [[package]] name = "errno" -version = "0.3.9" +version = "0.3.10" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "534c5cf6194dfab3db3242765c03bbe257cf92f22b38f6bc0c58d59108a820ba" +checksum = "33d852cb9b869c2a9b3df2f71a3074817f01e1844f839a144f5fcef059a4eb5d" dependencies = [ "libc", - "windows-sys 0.52.0", + "windows-sys 0.59.0", ] [[package]] @@ -4444,9 +4535,9 @@ dependencies = [ [[package]] name = "event-listener-strategy" -version = "0.5.2" +version = "0.5.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0f214dc438f977e6d4e3500aaa277f5ad94ca83fbbd9b1a15713ce2344ccc5a1" +checksum = "3c3e4e0dd3673c1139bf041f3008816d9cf2946bbfac2945c09e523b8d7b05b2" dependencies = [ "event-listener 5.3.1", "pin-project-lite", @@ -4459,7 +4550,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "2d391ba4af7f1d93f01fcf7b2f29e2bc9348e109dfdbf4dcbdc51dfa38dab0b6" dependencies = [ "deunicode", - "http 1.1.0", + "http 1.2.0", "rand", "url-escape", ] @@ -4481,9 +4572,9 @@ dependencies = [ [[package]] name = "fastrand" -version = "2.1.1" +version = "2.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e8c02a5121d4ea3eb16a80748c74f5549a5665e4c21333c6098f283870fbdea6" +checksum = "37909eebbb50d72f9059c3b6d82c0463f2ff062c9e95845c43a6c9c0355411be" [[package]] name = "fd-lock" @@ -4501,7 +4592,7 @@ name = "fda" version = "0.32.0" dependencies = [ "chrono", - "clap 4.5.20", + "clap 4.5.23", "clap_complete", "directories", "env_logger 0.11.5", @@ -4532,7 +4623,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e182f7dbc2ef73d9ef67351c5fbbea084729c48362d3ce9dd44c28e32e277fe5" dependencies = [ "libc", - "thiserror 1.0.64", + "thiserror 1.0.69", ] [[package]] @@ -4584,6 +4675,26 @@ dependencies = [ "xxhash-rust", ] +[[package]] +name = "feldera-iceberg" +version = "0.1.0" +dependencies = [ + "anyhow", + "chrono", + "datafusion", + "dbsp", + "feldera-adapterlib", + "feldera-types", + "futures-util", + "iceberg", + "iceberg-catalog-glue", + "iceberg-catalog-rest", + "iceberg-datafusion", + "log", + "serde_json", + "tokio", +] + [[package]] name = "feldera-ijson" version = "0.1.4" @@ -4638,7 +4749,7 @@ dependencies = [ "rkyv", "serde", "serde_json", - "thiserror 1.0.64", + "thiserror 1.0.69", ] [[package]] @@ -4738,6 +4849,12 @@ version = "0.4.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0ce7134b9999ecaf8bcd65542e436736ef32ddca1b3e06094cb6ec5755203b80" +[[package]] +name = "flagset" +version = "0.4.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b3ea1ec5f8307826a5b71094dd91fc04d4ae75d5709b20ad351c7fb4815c86ec" + [[package]] name = "flatbuffers" version = "24.3.25" @@ -4750,9 +4867,9 @@ dependencies = [ [[package]] name = "flate2" -version = "1.0.34" +version = "1.0.35" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a1b589b4dc103969ad3cf85c950899926ec64300a1a46d76c03a6072957036f0" +checksum = "c936bfdafb507ebbf50b8074c54fa31c5be9a1e7e5f467dd659697041407d07c" dependencies = [ "crc32fast", "miniz_oxide", @@ -4868,11 +4985,11 @@ checksum = "9e5c1b78ca4aae1ac06c48a526a655760685149f0d465d21f37abfe57ce075c6" [[package]] name = "futures-lite" -version = "2.3.0" +version = "2.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "52527eb5074e35e9339c6b4e8d12600c7128b68fb25dcb9fa9dec18f7c25f3a5" +checksum = "cef40d21ae2c515b51041df9ed313ed21e572df340ea58a922a0aefe7e8891a1" dependencies = [ - "fastrand 2.1.1", + "fastrand 2.3.0", "futures-core", "futures-io", "parking", @@ -4954,9 +5071,9 @@ dependencies = [ [[package]] name = "geo-types" -version = "0.7.13" +version = "0.7.14" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9ff16065e5720f376fbced200a5ae0f47ace85fd70b7e54269790281353b6d61" +checksum = "b6f47c611187777bbca61ea7aba780213f5f3441fd36294ab333e96cfa791b65" dependencies = [ "approx", "num-traits", @@ -4979,7 +5096,7 @@ version = "0.2.21" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "14dbbfd5c71d70241ecf9e6f13737f7b5ce823821063188d7e46c41d371eebd5" dependencies = [ - "unicode-width", + "unicode-width 0.1.11", ] [[package]] @@ -5021,9 +5138,9 @@ dependencies = [ [[package]] name = "google-cloud-auth" -version = "0.17.1" +version = "0.17.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "357160f51a60ec3e32169ad687f4abe0ee1e90c73b449aa5d11256c4f1cf2ff6" +checksum = "e57a13fbacc5e9c41ded3ad8d0373175a6b7a6ad430d99e89d314ac121b7ab06" dependencies = [ "async-trait", "base64 0.21.7", @@ -5031,10 +5148,10 @@ dependencies = [ "google-cloud-token", "home", "jsonwebtoken 9.3.0", - "reqwest 0.12.8", + "reqwest 0.12.9", "serde", "serde_json", - "thiserror 1.0.64", + "thiserror 1.0.69", "time", "tokio", "tracing", @@ -5043,13 +5160,13 @@ dependencies = [ [[package]] name = "google-cloud-gax" -version = "0.19.1" +version = "0.19.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c929076122a1839455cfe6c030278f10a400dd4dacc11d2ca46c20c47dc05996" +checksum = "de13e62d7e0ffc3eb40a0113ddf753cf6ec741be739164442b08893db4f9bfca" dependencies = [ "google-cloud-token", - "http 1.1.0", - "thiserror 1.0.64", + "http 1.2.0", + "thiserror 1.0.69", "tokio", "tokio-retry2", "tonic", @@ -5063,7 +5180,7 @@ version = "0.15.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0ae8ab26ef7c7c3f7dfb9cc3982293d031d8e78c85d00ddfb704b5c35aeff7c8" dependencies = [ - "prost 0.13.3", + "prost 0.13.4", "prost-types", "tonic", ] @@ -5074,8 +5191,8 @@ version = "0.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "04f945a208886a13d07636f38fb978da371d0abc3e34bad338124b9f8c135a8f" dependencies = [ - "reqwest 0.12.8", - "thiserror 1.0.64", + "reqwest 0.12.9", + "thiserror 1.0.69", "tokio", ] @@ -5092,7 +5209,7 @@ dependencies = [ "google-cloud-googleapis", "google-cloud-token", "prost-types", - "thiserror 1.0.64", + "thiserror 1.0.69", "tokio", "tokio-util", "tracing", @@ -5171,7 +5288,7 @@ dependencies = [ "futures-sink", "futures-util", "http 0.2.12", - "indexmap 2.6.0", + "indexmap 2.7.0", "slab", "tokio", "tokio-util", @@ -5180,17 +5297,17 @@ dependencies = [ [[package]] name = "h2" -version = "0.4.6" +version = "0.4.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "524e8ac6999421f49a846c2d4411f337e53497d8ec55d67753beffa43c5d9205" +checksum = "ccae279728d634d083c00f6099cb58f01cc99c145b84b8be2f6c74618d79922e" dependencies = [ "atomic-waker", "bytes", "fnv", "futures-core", "futures-sink", - "http 1.1.0", - "indexmap 2.6.0", + "http 1.2.0", + "indexmap 2.7.0", "slab", "tokio", "tokio-util", @@ -5326,11 +5443,11 @@ dependencies = [ [[package]] name = "home" -version = "0.5.9" +version = "0.5.11" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e3d1354bf6b7235cb4a0576c2619fd4ed18183f689b12b006a0ee7329eeff9a5" +checksum = "589533453244b0995c858700322199b2becb13b627df2851f64a2775d024abcf" dependencies = [ - "windows-sys 0.52.0", + "windows-sys 0.59.0", ] [[package]] @@ -5346,9 +5463,9 @@ dependencies = [ [[package]] name = "http" -version = "1.1.0" +version = "1.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "21b9ddb458710bc376481b842f5da65cdf31522de232c1ca8146abce2a358258" +checksum = "f16ca2af56261c99fba8bac40a10251ce8188205a4c448fbb745a2e4daa76fea" dependencies = [ "bytes", "fnv", @@ -5373,7 +5490,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1efedce1fb8e6913f23e0c92de8e62cd5b772a67e7b3946df930a62566c93184" dependencies = [ "bytes", - "http 1.1.0", + "http 1.2.0", ] [[package]] @@ -5384,7 +5501,7 @@ checksum = "793429d76616a256bcb62c2a2ec2bed781c8307e797e2598c50010f2bee2544f" dependencies = [ "bytes", "futures-util", - "http 1.1.0", + "http 1.2.0", "http-body 1.0.1", "pin-project-lite", ] @@ -5415,9 +5532,9 @@ checksum = "9a3a5bfb195931eeb336b2a7b4d761daec841b97f947d34394601737a7bba5e4" [[package]] name = "hyper" -version = "0.14.31" +version = "0.14.32" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8c08302e8fa335b151b788c775ff56e7a03ae64ff85c548ee820fecb70356e85" +checksum = "41dfc780fdec9373c01bae43289ea34c972e40ee3c9f6b3c8801a35f35586ce7" dependencies = [ "bytes", "futures-channel", @@ -5439,15 +5556,15 @@ dependencies = [ [[package]] name = "hyper" -version = "1.5.0" +version = "1.5.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bbbff0a806a4728c99295b254c8838933b5b082d75e3cb70c8dab21fdfbcfa9a" +checksum = "256fb8d4bd6413123cc9d91832d78325c48ff41677595be797d90f42969beae0" dependencies = [ "bytes", "futures-channel", "futures-util", - "h2 0.4.6", - "http 1.1.0", + "h2 0.4.7", + "http 1.2.0", "http-body 1.0.1", "httparse", "httpdate", @@ -5465,7 +5582,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1788965e61b367cd03a62950836d5cd41560c3577d90e40e0819373194d1661c" dependencies = [ "http 0.2.12", - "hyper 0.14.31", + "hyper 0.14.32", "log", "rustls 0.20.9", "rustls-native-certs 0.6.3", @@ -5481,7 +5598,7 @@ checksum = "ec3efd23720e2049821a693cbc7e65ea87c72f1c58ff2f9522ff332b1491e590" dependencies = [ "futures-util", "http 0.2.12", - "hyper 0.14.31", + "hyper 0.14.32", "log", "rustls 0.21.12", "rustls-native-certs 0.6.3", @@ -5496,26 +5613,26 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "08afdbb5c31130e3034af566421053ab03787c640246a446327f550d11bcb333" dependencies = [ "futures-util", - "http 1.1.0", - "hyper 1.5.0", + "http 1.2.0", + "hyper 1.5.2", "hyper-util", "log", - "rustls 0.23.14", - "rustls-native-certs 0.8.0", + "rustls 0.23.20", + "rustls-native-certs 0.8.1", "rustls-pki-types", "tokio", - "tokio-rustls 0.26.0", + "tokio-rustls 0.26.1", "tower-service", "webpki-roots", ] [[package]] name = "hyper-timeout" -version = "0.5.1" +version = "0.5.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3203a961e5c83b6f5498933e78b6b263e208c197b63e9c6c53cc82ffd3f63793" +checksum = "2b90d566bffbce6a75bd8b09a05aa8c2cb1fabb6cb348f8840c9e4c90a0d83b0" dependencies = [ - "hyper 1.5.0", + "hyper 1.5.2", "hyper-util", "pin-project-lite", "tokio", @@ -5529,7 +5646,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d6183ddfa99b85da61a140bea0efc93fdf56ceaa041b37d553518030827f9905" dependencies = [ "bytes", - "hyper 0.14.31", + "hyper 0.14.32", "native-tls", "tokio", "tokio-native-tls", @@ -5543,7 +5660,7 @@ checksum = "70206fc6890eaca9fde8a0bf71caa2ddfc9fe045ac9e5c70df101a7dbde866e0" dependencies = [ "bytes", "http-body-util", - "hyper 1.5.0", + "hyper 1.5.2", "hyper-util", "native-tls", "tokio", @@ -5553,16 +5670,16 @@ dependencies = [ [[package]] name = "hyper-util" -version = "0.1.9" +version = "0.1.10" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "41296eb09f183ac68eec06e03cdbea2e759633d4067b2f6552fc2e009bcad08b" +checksum = "df2dcfbe0677734ab2f3ffa7fa7bfd4706bfdc1ef393f2ee30184aed67e631b4" dependencies = [ "bytes", "futures-channel", "futures-util", - "http 1.1.0", + "http 1.2.0", "http-body 1.0.1", - "hyper 1.5.0", + "hyper 1.5.2", "pin-project-lite", "socket2", "tokio", @@ -5594,54 +5711,281 @@ dependencies = [ ] [[package]] -name = "ident_case" -version = "1.0.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b9e0384b61958566e926dc50660321d12159025e767c18e043daf26b70104c39" - -[[package]] -name = "idna" -version = "0.5.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "634d9b1461af396cad843f47fdba5597a4f9e6ddd4bfb6ff5d85028c25cb12f6" +name = "iceberg" +version = "0.3.0" +source = "git+https://github.com/apache/iceberg-rust.git?rev=2e0b646#2e0b64646fcfbd909788236a251a3a374a193542" dependencies = [ - "unicode-bidi", - "unicode-normalization", + "anyhow", + "apache-avro", + "array-init", + "arrow-arith", + "arrow-array", + "arrow-cast", + "arrow-ord", + "arrow-schema", + "arrow-select", + "arrow-string", + "async-trait", + "bimap", + "bitvec", + "bytes", + "chrono", + "derive_builder", + "fnv", + "futures", + "itertools 0.13.0", + "moka", + "murmur3", + "num-bigint", + "once_cell", + "opendal", + "ordered-float 4.5.0", + "parquet", + "paste", + "rand", + "reqwest 0.12.9", + "rust_decimal", + "serde", + "serde_bytes", + "serde_derive", + "serde_json", + "serde_repr", + "serde_with", + "tokio", + "typed-builder 0.20.0", + "url", + "uuid", + "zstd 0.13.2", ] [[package]] -name = "impl-more" -version = "0.1.7" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e658178c10c747241199382079c0f195ce229866fbf4aa0d46fa6107fe33d2ec" - -[[package]] -name = "impl-trait-for-tuples" -version = "0.2.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "11d7a9f6330b71fea57921c9b61c47ee6e84f72d394754eff6163ae67e7395eb" +name = "iceberg-catalog-glue" +version = "0.3.0" +source = "git+https://github.com/apache/iceberg-rust.git?rev=2e0b646#2e0b64646fcfbd909788236a251a3a374a193542" dependencies = [ - "proc-macro2", - "quote", - "syn 1.0.109", + "anyhow", + "async-trait", + "aws-config 1.5.10", + "aws-sdk-glue", + "iceberg", + "log", + "serde_json", + "tokio", + "typed-builder 0.20.0", + "uuid", ] [[package]] -name = "indexmap" -version = "1.9.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bd070e393353796e801d209ad339e89596eb4c8d430d18ede6a1cced8fafbd99" +name = "iceberg-catalog-rest" +version = "0.3.0" +source = "git+https://github.com/apache/iceberg-rust.git?rev=2e0b646#2e0b64646fcfbd909788236a251a3a374a193542" dependencies = [ - "autocfg", - "hashbrown 0.12.3", + "async-trait", + "chrono", + "http 1.2.0", + "iceberg", + "itertools 0.13.0", + "log", + "reqwest 0.12.9", "serde", + "serde_derive", + "serde_json", + "tokio", + "typed-builder 0.20.0", + "uuid", ] [[package]] -name = "indexmap" -version = "2.6.0" +name = "iceberg-datafusion" +version = "0.3.0" +source = "git+https://github.com/apache/iceberg-rust.git?rev=2e0b646#2e0b64646fcfbd909788236a251a3a374a193542" +dependencies = [ + "anyhow", + "async-trait", + "datafusion", + "futures", + "iceberg", + "tokio", +] + +[[package]] +name = "icu_collections" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "db2fa452206ebee18c4b5c2274dbf1de17008e874b4dc4f0aea9d01ca79e4526" +dependencies = [ + "displaydoc", + "yoke", + "zerofrom", + "zerovec", +] + +[[package]] +name = "icu_locid" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "13acbb8371917fc971be86fc8057c41a64b521c184808a698c02acc242dbf637" +dependencies = [ + "displaydoc", + "litemap", + "tinystr", + "writeable", + "zerovec", +] + +[[package]] +name = "icu_locid_transform" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "01d11ac35de8e40fdeda00d9e1e9d92525f3f9d887cdd7aa81d727596788b54e" +dependencies = [ + "displaydoc", + "icu_locid", + "icu_locid_transform_data", + "icu_provider", + "tinystr", + "zerovec", +] + +[[package]] +name = "icu_locid_transform_data" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fdc8ff3388f852bede6b579ad4e978ab004f139284d7b28715f773507b946f6e" + +[[package]] +name = "icu_normalizer" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "19ce3e0da2ec68599d193c93d088142efd7f9c5d6fc9b803774855747dc6a84f" +dependencies = [ + "displaydoc", + "icu_collections", + "icu_normalizer_data", + "icu_properties", + "icu_provider", + "smallvec", + "utf16_iter", + "utf8_iter", + "write16", + "zerovec", +] + +[[package]] +name = "icu_normalizer_data" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f8cafbf7aa791e9b22bec55a167906f9e1215fd475cd22adfcf660e03e989516" + +[[package]] +name = "icu_properties" +version = "1.5.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "93d6020766cfc6302c15dbbc9c8778c37e62c14427cb7f6e601d849e092aeef5" +dependencies = [ + "displaydoc", + "icu_collections", + "icu_locid_transform", + "icu_properties_data", + "icu_provider", + "tinystr", + "zerovec", +] + +[[package]] +name = "icu_properties_data" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "67a8effbc3dd3e4ba1afa8ad918d5684b8868b3b26500753effea8d2eed19569" + +[[package]] +name = "icu_provider" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6ed421c8a8ef78d3e2dbc98a973be2f3770cb42b606e3ab18d6237c4dfde68d9" +dependencies = [ + "displaydoc", + "icu_locid", + "icu_provider_macros", + "stable_deref_trait", + "tinystr", + "writeable", + "yoke", + "zerofrom", + "zerovec", +] + +[[package]] +name = "icu_provider_macros" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1ec89e9337638ecdc08744df490b221a7399bf8d164eb52a665454e60e075ad6" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.90", +] + +[[package]] +name = "ident_case" +version = "1.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b9e0384b61958566e926dc50660321d12159025e767c18e043daf26b70104c39" + +[[package]] +name = "idna" +version = "1.0.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "707907fe3c25f5424cce2cb7e1cbcafee6bdbe735ca90ef77c29e84591e5b9da" +checksum = "686f825264d630750a544639377bae737628043f20d38bbc029e8f29ea968a7e" +dependencies = [ + "idna_adapter", + "smallvec", + "utf8_iter", +] + +[[package]] +name = "idna_adapter" +version = "1.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "daca1df1c957320b2cf139ac61e7bd64fed304c5040df000a745aa1de3b4ef71" +dependencies = [ + "icu_normalizer", + "icu_properties", +] + +[[package]] +name = "impl-more" +version = "0.1.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "aae21c3177a27788957044151cc2800043d127acaa460a47ebb9b84dfa2c6aa0" + +[[package]] +name = "impl-trait-for-tuples" +version = "0.2.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a0eb5a3343abf848c0984fe4604b2b105da9539376e24fc0a3b0007411ae4fd9" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.90", +] + +[[package]] +name = "indexmap" +version = "1.9.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bd070e393353796e801d209ad339e89596eb4c8d430d18ede6a1cced8fafbd99" +dependencies = [ + "autocfg", + "hashbrown 0.12.3", + "serde", +] + +[[package]] +name = "indexmap" +version = "2.7.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "62f822373a4fe84d4bb149bf54e584a7f4abec90e072ed49cda0edea5b95471f" dependencies = [ "equivalent", "hashbrown 0.15.2", @@ -5650,15 +5994,15 @@ dependencies = [ [[package]] name = "indicatif" -version = "0.17.8" +version = "0.17.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "763a5a8f45087d6bcea4222e7b72c291a054edf80e4ef6efd2a4979878c7bea3" +checksum = "cbf675b85ed934d3c67b5c5469701eec7db22689d0a2139d856e0925fa28b281" dependencies = [ "console", - "instant", "number_prefix", "portable-atomic", - "unicode-width", + "unicode-width 0.2.0", + "web-time", ] [[package]] @@ -5674,7 +6018,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "232929e1d75fe899576a3d5c7416ad0d88dbfbb3c3d6aa00873a7408a50ddb88" dependencies = [ "ahash 0.8.11", - "indexmap 2.6.0", + "indexmap 2.7.0", "is-terminal", "itoa", "log", @@ -5792,9 +6136,9 @@ dependencies = [ [[package]] name = "itoa" -version = "1.0.11" +version = "1.0.14" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "49f1f14873335454500d59611f1cf4a4b0f786f9ac11f4312a78e4cf2566695b" +checksum = "d75a2a4b1b190afb6f5425f10f6a8f959d2ea0b9c2b1d79553551850539e4674" [[package]] name = "jemalloc_pprof" @@ -5826,10 +6170,11 @@ dependencies = [ [[package]] name = "js-sys" -version = "0.3.72" +version = "0.3.76" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6a88f1bda2bd75b0452a14784937d796722fdebfe50df998aeb3f0b7603019a9" +checksum = "6717b6b5b077764fb5966237269cb3c64edddde4b14ce42647430a78ced9e7b7" dependencies = [ + "once_cell", "wasm-bindgen", ] @@ -5900,9 +6245,9 @@ checksum = "830d08ce1d1d941e6b30645f1a0eb5643013d835ce3779a5fc208261dbe10f55" [[package]] name = "lexical-core" -version = "1.0.2" +version = "1.0.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0431c65b318a590c1de6b8fd6e72798c92291d27762d94c9e6c37ed7a73d8458" +checksum = "b765c31809609075565a70b4b71402281283aeda7ecaf4818ac14a7b2ade8958" dependencies = [ "lexical-parse-float", "lexical-parse-integer", @@ -5913,9 +6258,9 @@ dependencies = [ [[package]] name = "lexical-parse-float" -version = "1.0.2" +version = "1.0.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "eb17a4bdb9b418051aa59d41d65b1c9be5affab314a872e5ad7f06231fb3b4e0" +checksum = "de6f9cb01fb0b08060209a057c048fcbab8717b4c1ecd2eac66ebfe39a65b0f2" dependencies = [ "lexical-parse-integer", "lexical-util", @@ -5924,9 +6269,9 @@ dependencies = [ [[package]] name = "lexical-parse-integer" -version = "1.0.2" +version = "1.0.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5df98f4a4ab53bf8b175b363a34c7af608fe31f93cc1fb1bf07130622ca4ef61" +checksum = "72207aae22fc0a121ba7b6d479e42cbfea549af1479c3f3a4f12c70dd66df12e" dependencies = [ "lexical-util", "static_assertions", @@ -5934,18 +6279,18 @@ dependencies = [ [[package]] name = "lexical-util" -version = "1.0.3" +version = "1.0.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "85314db53332e5c192b6bca611fb10c114a80d1b831ddac0af1e9be1b9232ca0" +checksum = "5a82e24bf537fd24c177ffbbdc6ebcc8d54732c35b50a3f28cc3f4e4c949a0b3" dependencies = [ "static_assertions", ] [[package]] name = "lexical-write-float" -version = "1.0.2" +version = "1.0.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6e7c3ad4e37db81c1cbe7cf34610340adc09c322871972f74877a712abc6c809" +checksum = "c5afc668a27f460fb45a81a757b6bf2f43c2d7e30cb5a2dcd3abf294c78d62bd" dependencies = [ "lexical-util", "lexical-write-integer", @@ -5954,9 +6299,9 @@ dependencies = [ [[package]] name = "lexical-write-integer" -version = "1.0.2" +version = "1.0.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "eb89e9f6958b83258afa3deed90b5de9ef68eef090ad5086c791cd2345610162" +checksum = "629ddff1a914a836fb245616a7888b62903aae58fa771e1d83943035efa0f978" dependencies = [ "lexical-util", "static_assertions", @@ -5964,9 +6309,9 @@ dependencies = [ [[package]] name = "libc" -version = "0.2.159" +version = "0.2.168" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "561d97a539a36e26a9a5fad1ea11a3039a67714694aaa379433e580854bc3dc5" +checksum = "5aaeb2981e0606ca11d79718f8bb01164f1d6ed75080182d3abf017e6d244b6d" [[package]] name = "libflate" @@ -5994,9 +6339,9 @@ dependencies = [ [[package]] name = "libloading" -version = "0.8.5" +version = "0.8.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4979f22fdb869068da03c9f7528f8297c6fd2606bc3a4affe42e6a823fdb8da4" +checksum = "fc2f4eb4bc735547cfed7c0a4922cbd04a4655978c09b54f1f7b228750664c34" dependencies = [ "cfg-if", "windows-targets 0.52.6", @@ -6004,9 +6349,9 @@ dependencies = [ [[package]] name = "libm" -version = "0.2.8" +version = "0.2.11" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4ec2a862134d2a7d32d7983ddcdd1c4923530833c9f2ea1a44fc5fa473989058" +checksum = "8355be11b20d696c8f18f6cc018c4e372165b1fa8126cef092399c9951984ffa" [[package]] name = "libredox" @@ -6043,6 +6388,12 @@ version = "0.4.14" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "78b3ae25bc7c8c38cec158d1f2757ee79e9b3740fbc7ccf0e59e4b08d793fa89" +[[package]] +name = "litemap" +version = "0.7.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4ee93343901ab17bd981295f2cf0026d4ad018c7c31ba84549a4ddbb47a45104" + [[package]] name = "local-channel" version = "0.1.5" @@ -6181,15 +6532,15 @@ checksum = "b4f0c8427b39666bf970460908b213ec09b3b350f20c0c2eabcbba51704a08e6" dependencies = [ "base64 0.22.1", "http-body-util", - "hyper 1.5.0", + "hyper 1.5.2", "hyper-rustls 0.27.3", "hyper-util", - "indexmap 2.6.0", + "indexmap 2.7.0", "ipnet", "metrics", "metrics-util", "quanta", - "thiserror 1.0.64", + "thiserror 1.0.69", "tokio", "tracing", ] @@ -6204,10 +6555,10 @@ dependencies = [ "crossbeam-epoch", "crossbeam-utils", "hashbrown 0.14.5", - "indexmap 2.6.0", + "indexmap 2.7.0", "metrics", "num_cpus", - "ordered-float 4.3.0", + "ordered-float 4.5.0", "quanta", "radix_trie", "sketches-ddsketch", @@ -6304,23 +6655,16 @@ dependencies = [ [[package]] name = "mio" -version = "1.0.2" +version = "1.0.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "80e04d1dcff3aae0704555fe5fee3bcfaf3d1fdf8a7e521d5b9d2b42acb52cec" +checksum = "2886843bf800fba2e3377cff24abf6379b4c4d5c6681eaf9ea5b0d15090450bd" dependencies = [ - "hermit-abi 0.3.9", "libc", "log", "wasi", "windows-sys 0.52.0", ] -[[package]] -name = "mirai-annotations" -version = "1.12.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c9be0862c1b3f26a88803c4a49de6889c10e608b3ee9344e6ef5b45fb37ad3d1" - [[package]] name = "mockall" version = "0.12.1" @@ -6348,6 +6692,36 @@ dependencies = [ "syn 2.0.90", ] +[[package]] +name = "moka" +version = "0.12.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "32cf62eb4dd975d2dde76432fb1075c49e3ee2331cf36f1f8fd4b66550d32b6f" +dependencies = [ + "async-lock", + "async-trait", + "crossbeam-channel", + "crossbeam-epoch", + "crossbeam-utils", + "event-listener 5.3.1", + "futures-util", + "once_cell", + "parking_lot", + "quanta", + "rustc_version", + "smallvec", + "tagptr", + "thiserror 1.0.69", + "triomphe", + "uuid", +] + +[[package]] +name = "murmur3" +version = "0.5.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9252111cf132ba0929b6f8e030cac2a24b507f3a4d6db6fb2896f27b354c714b" + [[package]] name = "native-tls" version = "0.2.12" @@ -6360,7 +6734,7 @@ dependencies = [ "openssl-probe", "openssl-sys", "schannel", - "security-framework", + "security-framework 2.11.1", "security-framework-sys", "tempfile", ] @@ -6563,34 +6937,13 @@ dependencies = [ "libc", ] -[[package]] -name = "num_enum" -version = "0.5.11" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1f646caf906c20226733ed5b1374287eb97e3c2a5c227ce668c1f2ce20ae57c9" -dependencies = [ - "num_enum_derive 0.5.11", -] - [[package]] name = "num_enum" version = "0.7.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "4e613fc340b2220f734a8595782c551f1250e969d87d3be1ae0579e8d4065179" dependencies = [ - "num_enum_derive 0.7.3", -] - -[[package]] -name = "num_enum_derive" -version = "0.5.11" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "dcbff9bc912032c62bf65ef1d5aea88983b420f4f839db1e9b0c281a25c9c799" -dependencies = [ - "proc-macro-crate 1.3.1", - "proc-macro2", - "quote", - "syn 1.0.109", + "num_enum_derive", ] [[package]] @@ -6599,7 +6952,7 @@ version = "0.7.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "af1844ef2428cc3e1cb900be36181049ef3d3193c63e43026cfe202983b27a56" dependencies = [ - "proc-macro-crate 3.2.0", + "proc-macro-crate", "proc-macro2", "quote", "syn 2.0.90", @@ -6632,14 +6985,14 @@ dependencies = [ "chrono", "futures", "humantime", - "hyper 1.5.0", + "hyper 1.5.2", "itertools 0.13.0", "md-5", "parking_lot", "percent-encoding", "quick-xml 0.36.2", "rand", - "reqwest 0.12.8", + "reqwest 0.12.9", "ring 0.17.8", "rustls-pemfile 2.2.0", "serde", @@ -6669,16 +7022,46 @@ version = "2.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "cc02deea53ffe807708244e5914f6b099ad7015a207ee24317c22112e17d9c5c" dependencies = [ - "indexmap 2.6.0", + "indexmap 2.7.0", + "serde", + "serde_json", +] + +[[package]] +name = "opendal" +version = "0.50.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cb28bb6c64e116ceaf8dd4e87099d3cfea4a58e85e62b104fef74c91afba0f44" +dependencies = [ + "anyhow", + "async-trait", + "backon", + "base64 0.22.1", + "bytes", + "chrono", + "crc32c", + "flagset", + "futures", + "getrandom", + "http 1.2.0", + "log", + "md-5", + "once_cell", + "percent-encoding", + "quick-xml 0.36.2", + "reqsign", + "reqwest 0.12.9", "serde", "serde_json", + "tokio", + "uuid", ] [[package]] name = "openssl" -version = "0.10.66" +version = "0.10.68" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9529f4786b70a3e8c61e11179af17ab6188ad8d0ded78c5529441ed39d4bd9c1" +checksum = "6174bc48f102d208783c2c84bf931bb75927a617866870de8a4ea85597f871f5" dependencies = [ "bitflags 2.6.0", "cfg-if", @@ -6708,18 +7091,18 @@ checksum = "ff011a302c396a5197692431fc1948019154afc178baf7d8e37367442a4601cf" [[package]] name = "openssl-src" -version = "300.3.2+3.3.2" +version = "300.4.1+3.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a211a18d945ef7e648cc6e0058f4c548ee46aab922ea203e0d30e966ea23647b" +checksum = "faa4eac4138c62414b5622d1b31c5c304f34b406b013c079c2bbc652fdd6678c" dependencies = [ "cc", ] [[package]] name = "openssl-sys" -version = "0.9.103" +version = "0.9.104" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7f9e8deee91df40a943c71b917e5874b951d32a802526c85721ce3b776c929d6" +checksum = "45abf306cbf99debc8195b66b7346498d7b10c210de50418b5ccd7ceba08c741" dependencies = [ "cc", "libc", @@ -6750,7 +7133,7 @@ dependencies = [ "js-sys", "once_cell", "pin-project-lite", - "thiserror 1.0.64", + "thiserror 1.0.69", ] [[package]] @@ -6768,7 +7151,7 @@ dependencies = [ "opentelemetry_api", "percent-encoding", "rand", - "thiserror 1.0.64", + "thiserror 1.0.69", ] [[package]] @@ -6800,9 +7183,9 @@ dependencies = [ [[package]] name = "ordered-float" -version = "4.3.0" +version = "4.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "44d501f1a72f71d3c063a6bbc8f7271fa73aa09fe5d6283b6571e2ed176a2537" +checksum = "c65ee1f9701bf938026630b455d5315f490640234259037edb259798b3bcf85e" dependencies = [ "num-traits", "rand", @@ -6815,10 +7198,20 @@ version = "0.4.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ccd746e37177e1711c20dd619a1620f34f5c8b569c53590a72dedd5344d8924a" dependencies = [ - "dlv-list", + "dlv-list 0.3.0", "hashbrown 0.12.3", ] +[[package]] +name = "ordered-multimap" +version = "0.7.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "49203cdcae0030493bad186b28da2fa25645fa276a51b6fec8010d281e02ef79" +dependencies = [ + "dlv-list 0.5.2", + "hashbrown 0.14.5", +] + [[package]] name = "os_pipe" version = "1.2.1" @@ -6899,7 +7292,7 @@ dependencies = [ "ansitok", "bytecount", "fnv", - "unicode-width", + "unicode-width 0.1.11", ] [[package]] @@ -7053,7 +7446,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b4c5cc86750666a3ed20bdaf5ca2a0344f9c67674cae0515bec2da16fbaa47db" dependencies = [ "fixedbitset", - "indexmap 2.6.0", + "indexmap 2.7.0", ] [[package]] @@ -7062,8 +7455,8 @@ version = "0.1.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9e024e787d4b99c9f2c1e7f84e0da059ad4ca935e60916a830b5a3e0030d5aa1" dependencies = [ - "rust-ini", - "thiserror 1.0.64", + "rust-ini 0.18.0", + "thiserror 1.0.69", "tokio-postgres", ] @@ -7080,7 +7473,7 @@ dependencies = [ "lazy_static", "log", "reqwest 0.11.27", - "thiserror 1.0.64", + "thiserror 1.0.69", "tokio", "zip 0.6.6", ] @@ -7125,18 +7518,18 @@ dependencies = [ [[package]] name = "pin-project" -version = "1.1.6" +version = "1.1.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "baf123a161dde1e524adf36f90bc5d8d3462824a9c43553ad07a8183161189ec" +checksum = "be57f64e946e500c8ee36ef6331845d40a93055567ec57e8fae13efd33759b95" dependencies = [ "pin-project-internal", ] [[package]] name = "pin-project-internal" -version = "1.1.6" +version = "1.1.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a4502d8515ca9f32f1fb543d987f63d95a14934883db45bdb48060b6b69257f8" +checksum = "3c0f5fad0874fc7abcd4d750e76917eaebbecaa2c20bde22e1dbeeba8beb758c" dependencies = [ "proc-macro2", "quote", @@ -7145,9 +7538,9 @@ dependencies = [ [[package]] name = "pin-project-lite" -version = "0.2.14" +version = "0.2.15" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bda66fc9667c18cb2758a2ac84d1167245054bcf85d5d1aaa6923f45801bdd02" +checksum = "915a1e146535de9163f3987b8944ed8cf49a18bb0056bcebcdcece385cece4ff" [[package]] name = "pin-utils" @@ -7175,7 +7568,7 @@ dependencies = [ "cached 0.43.0", "change-detection", "chrono", - "clap 4.5.20", + "clap 4.5.23", "colored", "deadpool-postgres", "dirs 5.0.1", @@ -7200,16 +7593,16 @@ dependencies = [ "refinery", "regex", "reqwest 0.11.27", - "rustls 0.23.14", + "rustls 0.23.20", "serde", "serde_json", "serde_yaml", - "serial_test 3.1.1", + "serial_test 3.2.0", "static-files", "static_assertions", "tempfile", "termbg", - "thiserror 1.0.64", + "thiserror 1.0.69", "tokio", "tokio-postgres", "tokio-stream", @@ -7228,7 +7621,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "96c8c490f422ef9a4efd2cb5b42b76c8613d7e7dfc1caf667b8a3350a5acc066" dependencies = [ "atomic-waker", - "fastrand 2.1.1", + "fastrand 2.3.0", "futures-io", ] @@ -7284,9 +7677,9 @@ dependencies = [ [[package]] name = "polling" -version = "3.7.3" +version = "3.7.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cc2790cd301dec6cd3b7a025e4815cf825724a51c98dccfe6a3e55f05ffb6511" +checksum = "a604568c3202727d1507653cb121dbd627a58684eb09a820fd746bee38b4442f" dependencies = [ "cfg-if", "concurrent-queue", @@ -7299,9 +7692,9 @@ dependencies = [ [[package]] name = "portable-atomic" -version = "1.9.0" +version = "1.10.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cc9c68a3f6da06753e9335d63e27f6b9754dd1920d941135b7ea8224f141adb2" +checksum = "280dc24453071f1b63954171985a0b0d30058d287960968b9b2aca264c8d4ee6" [[package]] name = "postgres-protocol" @@ -7361,7 +7754,7 @@ dependencies = [ "smallvec", "symbolic-demangle", "tempfile", - "thiserror 1.0.64", + "thiserror 1.0.69", ] [[package]] @@ -7411,9 +7804,9 @@ dependencies = [ [[package]] name = "prettyplease" -version = "0.2.22" +version = "0.2.25" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "479cf940fbbb3426c32c5d5176f62ad57549a0bb84773423ba8be9d089f5faba" +checksum = "64d1ec885c64d0457d564db4ec299b2dae3f9c02808b8ad9c3a089c591b18033" dependencies = [ "proc-macro2", "syn 2.0.90", @@ -7429,23 +7822,13 @@ dependencies = [ "indexmap 1.9.3", ] -[[package]] -name = "proc-macro-crate" -version = "1.3.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7f4c021e1093a56626774e81216a4ce732a735e5bad4868a03f3ed65ca0c3919" -dependencies = [ - "once_cell", - "toml_edit 0.19.15", -] - [[package]] name = "proc-macro-crate" version = "3.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8ecf48c7ca261d60b74ab1a7b20da18bede46776b2e55535cb958eb595c5fa7b" dependencies = [ - "toml_edit 0.22.22", + "toml_edit", ] [[package]] @@ -7530,7 +7913,7 @@ dependencies = [ "getopts", "heck 0.5.0", "http 0.2.12", - "indexmap 2.6.0", + "indexmap 2.7.0", "openapiv3", "proc-macro2", "quote", @@ -7539,7 +7922,7 @@ dependencies = [ "serde", "serde_json", "syn 2.0.90", - "thiserror 1.0.64", + "thiserror 1.0.69", "typify", "unicode-ident", ] @@ -7564,9 +7947,9 @@ dependencies = [ [[package]] name = "proptest" -version = "1.5.0" +version = "1.6.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b4c2511913b88df1637da85cc8d96ec8e43a3f8bb8ccb71ee1ac240d6f3df58d" +checksum = "14cae93065090804185d3b75f0bf93b8eeda30c7a9b4a33d3bdb3988d6229e50" dependencies = [ "bit-set", "bit-vec", @@ -7584,9 +7967,9 @@ dependencies = [ [[package]] name = "proptest-derive" -version = "0.5.0" +version = "0.5.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6ff7ff745a347b87471d859a377a9a404361e7efc2a971d73424a6d183c0fc77" +checksum = "4ee1c9ac207483d5e7db4940700de86a9aae46ef90c48b57f99fe7edb8345e49" dependencies = [ "proc-macro2", "quote", @@ -7595,9 +7978,9 @@ dependencies = [ [[package]] name = "proptest-state-machine" -version = "0.3.0" +version = "0.3.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "28278d6a11102264b0c569c33dbe5286ba00d2dd6d96ff2a94296e0e5b3d1e04" +checksum = "e943d140e09d07740fb496487c51fb8eb31c70389ac4a2e9dcd8a0d9fdf228d4" dependencies = [ "proptest", ] @@ -7614,12 +7997,12 @@ dependencies = [ [[package]] name = "prost" -version = "0.13.3" +version = "0.13.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7b0487d90e047de87f984913713b85c601c05609aad5b0df4b4573fbf69aa13f" +checksum = "2c0fef6c4230e4ccf618a35c59d7ede15dea37de8427500f50aff708806e42ec" dependencies = [ "bytes", - "prost-derive 0.13.3", + "prost-derive 0.13.4", ] [[package]] @@ -7637,9 +8020,9 @@ dependencies = [ [[package]] name = "prost-derive" -version = "0.13.3" +version = "0.13.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e9552f850d5f0964a4e4d0bf306459ac29323ddfbae05e35a7c0d35cb0803cc5" +checksum = "157c5a9d7ea5c2ed2d9fb8f495b64759f7816c7eaea54ba3978f0d63000162e3" dependencies = [ "anyhow", "itertools 0.13.0", @@ -7650,11 +8033,11 @@ dependencies = [ [[package]] name = "prost-types" -version = "0.13.3" +version = "0.13.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4759aa0d3a6232fb8dbdb97b61de2c20047c68aca932c7ed76da9d788508d670" +checksum = "cc2f1e56baa61e93533aebc21af4d2134b70f66275e0fcdf3cbe43d77ff7e8fc" dependencies = [ - "prost 0.13.3", + "prost 0.13.4", ] [[package]] @@ -7672,7 +8055,7 @@ dependencies = [ "num_cpus", "once_cell", "platforms", - "thiserror 1.0.64", + "thiserror 1.0.69", "unescape", ] @@ -7718,15 +8101,15 @@ dependencies = [ [[package]] name = "quad-rand" -version = "0.2.2" +version = "0.2.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b76f1009795ca44bb5aaae8fd3f18953e209259c33d9b059b1f53d58ab7511db" +checksum = "5a651516ddc9168ebd67b24afd085a718be02f8858fe406591b013d101ce2f40" [[package]] name = "quanta" -version = "0.12.3" +version = "0.12.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8e5167a477619228a0b284fac2674e3c388cba90631d7b7de620e6f1fcd08da5" +checksum = "773ce68d0bb9bc7ef20be3536ffe94e223e1f365bd374108b2659fac0c65cfe6" dependencies = [ "crossbeam-utils", "libc", @@ -7752,6 +8135,16 @@ dependencies = [ "memchr", ] +[[package]] +name = "quick-xml" +version = "0.35.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "86e446ed58cef1bbfe847bc2fda0e2e4ea9f0e57b90c507d4781292590d72a4e" +dependencies = [ + "memchr", + "serde", +] + [[package]] name = "quick-xml" version = "0.36.2" @@ -7764,45 +8157,49 @@ dependencies = [ [[package]] name = "quinn" -version = "0.11.5" +version = "0.11.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8c7c5fdde3cdae7203427dc4f0a68fe0ed09833edc525a03456b153b79828684" +checksum = "62e96808277ec6f97351a2380e6c25114bc9e67037775464979f3037c92d05ef" dependencies = [ "bytes", "pin-project-lite", "quinn-proto", "quinn-udp", - "rustc-hash 2.0.0", - "rustls 0.23.14", + "rustc-hash 2.1.0", + "rustls 0.23.20", "socket2", - "thiserror 1.0.64", + "thiserror 2.0.7", "tokio", "tracing", ] [[package]] name = "quinn-proto" -version = "0.11.8" +version = "0.11.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fadfaed2cd7f389d0161bb73eeb07b7b78f8691047a6f3e73caaeae55310a4a6" +checksum = "a2fe5ef3495d7d2e377ff17b1a8ce2ee2ec2a18cde8b6ad6619d65d0701c135d" dependencies = [ "bytes", + "getrandom", "rand", "ring 0.17.8", - "rustc-hash 2.0.0", - "rustls 0.23.14", + "rustc-hash 2.1.0", + "rustls 0.23.20", + "rustls-pki-types", "slab", - "thiserror 1.0.64", + "thiserror 2.0.7", "tinyvec", "tracing", + "web-time", ] [[package]] name = "quinn-udp" -version = "0.5.5" +version = "0.5.8" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4fe68c2e9e1a1234e218683dbdf9f9dfcb094113c5ac2b938dfcb9bab4c4140b" +checksum = "52cd4b1eff68bf27940dd39811292c49e007f4d0b4c357358dc9b0197be6b527" dependencies = [ + "cfg_aliases 0.2.1", "libc", "once_cell", "socket2", @@ -7954,14 +8351,14 @@ dependencies = [ [[package]] name = "rdkafka-sys" -version = "4.7.0+2.3.0" +version = "4.8.0+2.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "55e0d2f9ba6253f6ec72385e453294f8618e9e15c2c6aba2a5c01ccf9622d615" +checksum = "ced38182dc436b3d9df0c77976f37a67134df26b050df1f0006688e46fc4c8be" dependencies = [ "cmake", "libc", "libz-sys", - "num_enum 0.5.11", + "num_enum", "openssl-sys", "pkg-config", "sasl2-sys", @@ -7985,9 +8382,9 @@ dependencies = [ [[package]] name = "redox_syscall" -version = "0.5.7" +version = "0.5.8" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9b6dfecf2c74bce2466cabf93f6664d6998a69eb21e39f4207930065b27b771f" +checksum = "03a862b389f93e68874fbf580b9de08dd02facb9a788ebadaf4a3fd33cf58834" dependencies = [ "bitflags 2.6.0", ] @@ -8000,7 +8397,7 @@ checksum = "ba009ff324d1fc1b900bd1fdb31564febe58a8ccc8a6fdbb93b543d33b13ca43" dependencies = [ "getrandom", "libredox", - "thiserror 1.0.64", + "thiserror 1.0.69", ] [[package]] @@ -8025,7 +8422,7 @@ dependencies = [ "regex", "serde", "siphasher 1.0.1", - "thiserror 1.0.64", + "thiserror 1.0.69", "time", "tokio", "tokio-postgres", @@ -8050,13 +8447,13 @@ dependencies = [ [[package]] name = "regex" -version = "1.11.0" +version = "1.11.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "38200e5ee88914975b69f657f0801b6f6dccafd44fd9326302a4aaeecfacb1d8" +checksum = "b544ef1b4eac5dc2db33ea63606ae9ffcfac26c1416a2806ae0bf5f56b201191" dependencies = [ "aho-corasick", "memchr", - "regex-automata 0.4.8", + "regex-automata 0.4.9", "regex-syntax 0.8.5", ] @@ -8071,9 +8468,9 @@ dependencies = [ [[package]] name = "regex-automata" -version = "0.4.8" +version = "0.4.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "368758f23274712b504848e9d5a6f010445cc8b87a7cdb4d7cbee666c1288da3" +checksum = "809e8dc61f6de73b46c85f4c96486310fe304c434cfa43669d7b40f711150908" dependencies = [ "aho-corasick", "memchr", @@ -8117,6 +8514,34 @@ dependencies = [ "bytecheck", ] +[[package]] +name = "reqsign" +version = "0.16.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "eb0075a66c8bfbf4cc8b70dca166e722e1f55a3ea9250ecbb85f4d92a5f64149" +dependencies = [ + "anyhow", + "async-trait", + "base64 0.22.1", + "chrono", + "form_urlencoded", + "getrandom", + "hex", + "hmac", + "home", + "http 1.2.0", + "log", + "percent-encoding", + "quick-xml 0.35.0", + "rand", + "reqwest 0.12.9", + "rust-ini 0.21.1", + "serde", + "serde_json", + "sha1", + "sha2", +] + [[package]] name = "reqwest" version = "0.11.27" @@ -8131,7 +8556,7 @@ dependencies = [ "h2 0.3.26", "http 0.2.12", "http-body 0.4.6", - "hyper 0.14.31", + "hyper 0.14.32", "hyper-tls 0.5.0", "ipnet", "js-sys", @@ -8146,7 +8571,7 @@ dependencies = [ "serde_json", "serde_urlencoded", "sync_wrapper 0.1.2", - "system-configuration", + "system-configuration 0.5.1", "tokio", "tokio-native-tls", "tokio-util", @@ -8161,9 +8586,9 @@ dependencies = [ [[package]] name = "reqwest" -version = "0.12.8" +version = "0.12.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f713147fbe92361e52392c73b8c9e48c04c6625bce969ef54dc901e58e042a7b" +checksum = "a77c62af46e79de0a562e1a9849205ffcb7fc1238876e9bd743357570e04046f" dependencies = [ "base64 0.22.1", "bytes", @@ -8171,11 +8596,11 @@ dependencies = [ "futures-channel", "futures-core", "futures-util", - "h2 0.4.6", - "http 1.1.0", + "h2 0.4.7", + "http 1.2.0", "http-body 1.0.1", "http-body-util", - "hyper 1.5.0", + "hyper 1.5.2", "hyper-rustls 0.27.3", "hyper-tls 0.6.0", "hyper-util", @@ -8188,17 +8613,18 @@ dependencies = [ "percent-encoding", "pin-project-lite", "quinn", - "rustls 0.23.14", - "rustls-native-certs 0.8.0", + "rustls 0.23.20", + "rustls-native-certs 0.8.1", "rustls-pemfile 2.2.0", "rustls-pki-types", "serde", "serde_json", "serde_urlencoded", - "sync_wrapper 1.0.1", + "sync_wrapper 1.0.2", + "system-configuration 0.6.1", "tokio", "tokio-native-tls", - "tokio-rustls 0.26.0", + "tokio-rustls 0.26.1", "tokio-util", "tower-service", "url", @@ -8346,9 +8772,9 @@ dependencies = [ [[package]] name = "roaring" -version = "0.10.6" +version = "0.10.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8f4b84ba6e838ceb47b41de5194a60244fac43d9fe03b71dbe8c5a201081d6d1" +checksum = "41589aba99537475bf697f2118357cad1c31590c5a1b9f6d9fc4ad6d07503661" dependencies = [ "bytemuck", "byteorder", @@ -8446,7 +8872,18 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f6d5f2436026b4f6e79dc829837d467cc7e9a55ee40e750d716713540715a2df" dependencies = [ "cfg-if", - "ordered-multimap", + "ordered-multimap 0.4.3", +] + +[[package]] +name = "rust-ini" +version = "0.21.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4e310ef0e1b6eeb79169a1171daf9abcb87a2e17c03bee2c4bb100b55c75409f" +dependencies = [ + "cfg-if", + "ordered-multimap 0.7.3", + "trim-in-place", ] [[package]] @@ -8456,7 +8893,13 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b082d80e3e3cc52b2ed634388d436fe1f4de6af5786cc2de9ba9737527bdf555" dependencies = [ "arrayvec 0.7.6", + "borsh", + "bytes", "num-traits", + "rand", + "rkyv", + "serde", + "serde_json", ] [[package]] @@ -8483,9 +8926,9 @@ checksum = "08d43f7aa6b08d49f382cde6a7982047c3426db949b1424bc4b7ec9ae12c6ce2" [[package]] name = "rustc-hash" -version = "2.0.0" +version = "2.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "583034fd73374156e66797ed8e5b0d5690409c9226b22d87cb7f19821c05d152" +checksum = "c7fb8039b3032c191086b10f11f319a6e99e1e82889c5cc6046f515c9db1d497" [[package]] name = "rustc_version" @@ -8498,15 +8941,15 @@ dependencies = [ [[package]] name = "rustix" -version = "0.38.37" +version = "0.38.42" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8acb788b847c24f28525660c4d7758620a7210875711f79e7f663cc152726811" +checksum = "f93dc38ecbab2eb790ff964bb77fa94faf256fd3e73285fd7ba0903b76bedb85" dependencies = [ "bitflags 2.6.0", "errno", "libc", "linux-raw-sys", - "windows-sys 0.52.0", + "windows-sys 0.59.0", ] [[package]] @@ -8535,9 +8978,9 @@ dependencies = [ [[package]] name = "rustls" -version = "0.23.14" +version = "0.23.20" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "415d9944693cb90382053259f89fbb077ea730ad7273047ec63b19bc9b160ba8" +checksum = "5065c3f250cbd332cd894be57c40fa52387247659b14a2d6041d121547903b1b" dependencies = [ "aws-lc-rs", "log", @@ -8558,20 +9001,19 @@ dependencies = [ "openssl-probe", "rustls-pemfile 1.0.4", "schannel", - "security-framework", + "security-framework 2.11.1", ] [[package]] name = "rustls-native-certs" -version = "0.8.0" +version = "0.8.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fcaf18a4f2be7326cd874a5fa579fae794320a0f388d365dca7e480e55f83f8a" +checksum = "7fcff2dd52b58a8d98a70243663a0d234c4e2b79235637849d15913394a247d3" dependencies = [ "openssl-probe", - "rustls-pemfile 2.2.0", "rustls-pki-types", "schannel", - "security-framework", + "security-framework 3.0.1", ] [[package]] @@ -8594,9 +9036,12 @@ dependencies = [ [[package]] name = "rustls-pki-types" -version = "1.10.0" +version = "1.10.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "16f1201b3c9a7ee8039bcadc17b7e605e2945b27eee7631788c1bd2b0643674b" +checksum = "d2bf47e6ff922db3825eb750c4e2ff784c6ff8fb9e13046ef6a1d1c5401b0b37" +dependencies = [ + "web-time", +] [[package]] name = "rustls-webpki" @@ -8655,7 +9100,7 @@ dependencies = [ "nix 0.28.0", "radix_trie", "unicode-segmentation", - "unicode-width", + "unicode-width 0.1.11", "utf8parse", "windows-sys 0.52.0", ] @@ -8690,18 +9135,18 @@ dependencies = [ [[package]] name = "scc" -version = "2.2.2" +version = "2.2.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f2c1f7fc6deb21665a9060dfc7d271be784669295a31babdcd4dd2c79ae8cbfb" +checksum = "94b13f8ea6177672c49d12ed964cca44836f59621981b04a3e26b87e675181de" dependencies = [ "sdd", ] [[package]] name = "schannel" -version = "0.1.26" +version = "0.1.27" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "01227be5826fa0690321a2ba6c5cd57a19cf3f6a09e76973b58e61de6ab9d1c1" +checksum = "1f29ebaa345f945cec9fbbc532eb307f0fdad8161f281b6369539c8d84876b3d" dependencies = [ "windows-sys 0.59.0", ] @@ -8716,7 +9161,7 @@ dependencies = [ "byteorder", "dashmap 6.1.0", "futures", - "reqwest 0.12.8", + "reqwest 0.12.9", "serde", "serde_json", ] @@ -8765,9 +9210,9 @@ dependencies = [ [[package]] name = "sdd" -version = "3.0.4" +version = "3.0.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "49c1eeaf4b6a87c7479688c6d52b9f1153cedd3c489300564f932b065c6eab95" +checksum = "478f121bb72bbf63c52c93011ea1791dca40140dfe13f8336c4c5ac952c33aa9" [[package]] name = "seahash" @@ -8796,7 +9241,20 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "897b2245f0b511c87893af39b033e5ca9cce68824c4d7e7630b5a1d339658d02" dependencies = [ "bitflags 2.6.0", - "core-foundation", + "core-foundation 0.9.4", + "core-foundation-sys", + "libc", + "security-framework-sys", +] + +[[package]] +name = "security-framework" +version = "3.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e1415a607e92bec364ea2cf9264646dcce0f91e6d65281bd6f2819cca3bf39c8" +dependencies = [ + "bitflags 2.6.0", + "core-foundation 0.10.0", "core-foundation-sys", "libc", "security-framework-sys", @@ -8804,9 +9262,9 @@ dependencies = [ [[package]] name = "security-framework-sys" -version = "2.12.0" +version = "2.12.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ea4a292869320c0272d7bc55a5a6aafaff59b4f63404a003887b679a2e05b4b6" +checksum = "fa39c7303dc58b5543c94d22c1766b0d31f2ee58306363ea622b10bbc075eaa2" dependencies = [ "core-foundation-sys", "libc", @@ -8814,9 +9272,9 @@ dependencies = [ [[package]] name = "semver" -version = "1.0.23" +version = "1.0.24" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "61697e0a1c7e512e84a621326239844a24d8207b4669b41bc18b32ea5cbf988b" +checksum = "3cb6eb87a131f756572d7fb904f6e7b68633f09cca868c5df1c4b8d1a694bbba" dependencies = [ "serde", ] @@ -8829,9 +9287,9 @@ checksum = "a3f0bf26fd526d2a95683cd0f87bf103b8539e2ca1ef48ce002d67aad59aa0b4" [[package]] name = "serde" -version = "1.0.213" +version = "1.0.216" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3ea7893ff5e2466df8d720bb615088341b295f849602c6956047f8f80f0e9bc1" +checksum = "0b9781016e935a97e8beecf0c933758c97a5520d32930e460142b4cd80c6338e" dependencies = [ "serde_derive", ] @@ -8863,9 +9321,9 @@ dependencies = [ [[package]] name = "serde_derive" -version = "1.0.213" +version = "1.0.216" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7e85ad2009c50b58e87caa8cd6dac16bdf511bbfb7af6c33df902396aa480fa5" +checksum = "46f859dbbf73865c6627ed570e78961cd3ac92407a2d117204c49232485da55e" dependencies = [ "proc-macro2", "quote", @@ -8885,9 +9343,9 @@ dependencies = [ [[package]] name = "serde_json" -version = "1.0.132" +version = "1.0.133" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d726bfaff4b320266d395898905d0eba0345aae23b54aee3a737e260fd46db03" +checksum = "c7fceb2473b9166b2294ef05efcb65a3db80803f0b03ef86a5fc88a2b85ee377" dependencies = [ "itoa", "memchr", @@ -8895,6 +9353,17 @@ dependencies = [ "serde", ] +[[package]] +name = "serde_repr" +version = "0.1.19" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6c64451ba24fc7a6a2d60fc75dd9c83c90903b19028d4eff35e88fc1e86564e9" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.90", +] + [[package]] name = "serde_spanned" version = "0.6.8" @@ -8938,7 +9407,7 @@ dependencies = [ "chrono", "hex", "indexmap 1.9.3", - "indexmap 2.6.0", + "indexmap 2.7.0", "serde", "serde_derive", "serde_json", @@ -8964,7 +9433,7 @@ version = "0.9.34+deprecated" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6a8b1a1a2ebf674015cc02edccce75287f1a0130d394307b36743c2f5d504b47" dependencies = [ - "indexmap 2.6.0", + "indexmap 2.7.0", "itoa", "ryu", "serde", @@ -8987,16 +9456,16 @@ dependencies = [ [[package]] name = "serial_test" -version = "3.1.1" +version = "3.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4b4b487fe2acf240a021cf57c6b2b4903b1e78ca0ecd862a71b71d2a51fed77d" +checksum = "1b258109f244e1d6891bf1053a55d63a5cd4f8f4c30cf9a1280989f80e7a1fa9" dependencies = [ "futures", "log", "once_cell", "parking_lot", "scc", - "serial_test_derive 3.1.1", + "serial_test_derive 3.2.0", ] [[package]] @@ -9012,9 +9481,9 @@ dependencies = [ [[package]] name = "serial_test_derive" -version = "3.1.1" +version = "3.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "82fe9db325bcef1fbcde82e078a5cc4efdf787e96b3b9cf45b50b529f2083d67" +checksum = "5d69265a08751de7844521fd15003ae0a888e035773ba05695c5c759a6f89eef" dependencies = [ "proc-macro2", "quote", @@ -9122,7 +9591,7 @@ checksum = "adc4e5204eb1910f40f9cfa375f6f05b68c3abac4b6fd879c8ff5e7ae8a0a085" dependencies = [ "num-bigint", "num-traits", - "thiserror 1.0.64", + "thiserror 1.0.69", "time", ] @@ -9214,9 +9683,9 @@ checksum = "1b6b67fb9a61334225b5b790716f609cd58395f895b3fe8b328786812a40bc3b" [[package]] name = "socket2" -version = "0.5.7" +version = "0.5.8" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ce305eb0b4296696835b71df73eb912e0f1ffd2556a501fcede6e0c50349191c" +checksum = "c970269d99b64e60ec3bd6ad27270092a5394c4e309314b18ae3fe575695fbe8" dependencies = [ "libc", "windows-sys 0.52.0", @@ -9359,9 +9828,9 @@ checksum = "13c2bddecc57b384dee18652358fb23172facb8a2c51ccc10d74c157bdea3292" [[package]] name = "symbolic-common" -version = "12.12.0" +version = "12.12.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "366f1b4c6baf6cfefc234bbd4899535fca0b06c74443039a73f6dfb2fad88d77" +checksum = "e5ba5365997a4e375660bed52f5b42766475d5bc8ceb1bb13fea09c469ea0f49" dependencies = [ "debugid", "memmap2", @@ -9371,9 +9840,9 @@ dependencies = [ [[package]] name = "symbolic-demangle" -version = "12.12.0" +version = "12.12.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "aba05ba5b9962ea5617baf556293720a8b2d0a282aa14ee4bf10e22efc7da8c8" +checksum = "beff338b2788519120f38c59ff4bb15174f52a183e547bac3d6072c2c0aa48aa" dependencies = [ "cpp_demangle", "rustc-demangle", @@ -9402,18 +9871,6 @@ dependencies = [ "unicode-ident", ] -[[package]] -name = "syn_derive" -version = "0.1.8" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1329189c02ff984e9736652b1631330da25eaa6bc639089ed4915d25446cbe7b" -dependencies = [ - "proc-macro-error", - "proc-macro2", - "quote", - "syn 2.0.90", -] - [[package]] name = "sync_wrapper" version = "0.1.2" @@ -9422,13 +9879,24 @@ checksum = "2047c6ded9c721764247e62cd3b03c09ffc529b2ba5b10ec482ae507a4a70160" [[package]] name = "sync_wrapper" -version = "1.0.1" +version = "1.0.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a7065abeca94b6a8a577f9bd45aa0867a2238b74e8eb67cf10d492bc39351394" +checksum = "0bf256ce5efdfa370213c1dabab5935a12e49f2c58d15e9eac2870d3b4f27263" dependencies = [ "futures-core", ] +[[package]] +name = "synstructure" +version = "0.13.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c8af7666ab7b6390ab78131fb5b0fce11d6b7a6951602017c35fa82800708971" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.90", +] + [[package]] name = "system-configuration" version = "0.5.1" @@ -9436,8 +9904,19 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ba3a3adc5c275d719af8cb4272ea1c4a6d668a777f37e115f6d11ddbc1c8e0e7" dependencies = [ "bitflags 1.3.2", - "core-foundation", - "system-configuration-sys", + "core-foundation 0.9.4", + "system-configuration-sys 0.5.0", +] + +[[package]] +name = "system-configuration" +version = "0.6.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3c879d448e9d986b661742763247d3693ed13609438cf3d006f51f5368a5ba6b" +dependencies = [ + "bitflags 2.6.0", + "core-foundation 0.9.4", + "system-configuration-sys 0.6.0", ] [[package]] @@ -9450,6 +9929,16 @@ dependencies = [ "libc", ] +[[package]] +name = "system-configuration-sys" +version = "0.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8e1d1b10ced5ca923a1fcb8d03e96b8d3268065d724548c0211415ff6ac6bac4" +dependencies = [ + "core-foundation-sys", + "libc", +] + [[package]] name = "tabled" version = "0.16.0" @@ -9475,6 +9964,12 @@ dependencies = [ "syn 1.0.109", ] +[[package]] +name = "tagptr" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7b2093cf4c8eb1e67749a6762251bc9cd836b6fc171623bd0a9d324d37af2417" + [[package]] name = "tap" version = "1.0.1" @@ -9483,9 +9978,9 @@ checksum = "55937e1799185b12863d447f42597ed69d9928686b8d88a1df17376a097d8369" [[package]] name = "tar" -version = "0.4.42" +version = "0.4.43" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4ff6c40d3aedb5e06b57c6f669ad17ab063dd1e63d977c6a88e7f4dfa4f04020" +checksum = "c65998313f8e17d0d553d28f91a0df93e4dbbbf770279c7bc21ca0f09ea1a1f6" dependencies = [ "filetime", "libc", @@ -9508,7 +10003,7 @@ dependencies = [ "serde", "static_assertions", "tarpc-plugins", - "thiserror 1.0.64", + "thiserror 1.0.69", "tokio", "tokio-serde", "tokio-util", @@ -9529,12 +10024,12 @@ dependencies = [ [[package]] name = "tempfile" -version = "3.13.0" +version = "3.14.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f0f2c9fc62d0beef6951ccffd757e241266a2c833136efbe35af6cd2567dca5b" +checksum = "28cce251fcbc87fac86a866eeb0d6c2d536fc16d06f184bb61aeae11aa4cee0c" dependencies = [ "cfg-if", - "fastrand 2.1.1", + "fastrand 2.3.0", "once_cell", "rustix", "windows-sys 0.59.0", @@ -9548,7 +10043,7 @@ checksum = "c5a2e3f9dc199dfb67c63ed7fa1157f6728303c4154074bf301a002572b7711a" dependencies = [ "async-std", "crossterm", - "thiserror 1.0.64", + "thiserror 1.0.69", "winapi", ] @@ -9563,9 +10058,9 @@ dependencies = [ [[package]] name = "terminal_size" -version = "0.4.0" +version = "0.4.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4f599bd7ca042cfdf8f4512b277c02ba102247820f9d9d4a9f521f496751a6ef" +checksum = "5352447f921fda68cf61b4101566c0bdb5104eff6804d0678e5227580ab6a4e9" dependencies = [ "rustix", "windows-sys 0.59.0", @@ -9591,7 +10086,7 @@ checksum = "b7b3e525a49ec206798b40326a44121291b530c963cfb01018f63e135bac543d" dependencies = [ "smawk", "unicode-linebreak", - "unicode-width", + "unicode-width 0.1.11", ] [[package]] @@ -9602,27 +10097,27 @@ checksum = "23d434d3f8967a09480fb04132ebe0a3e088c173e6d0ee7897abbdf4eab0f8b9" [[package]] name = "thiserror" -version = "1.0.64" +version = "1.0.69" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d50af8abc119fb8bb6dbabcfa89656f46f84aa0ac7688088608076ad2b459a84" +checksum = "b6aaf5339b578ea85b50e080feb250a3e8ae8cfcdff9a461c9ec2904bc923f52" dependencies = [ - "thiserror-impl 1.0.64", + "thiserror-impl 1.0.69", ] [[package]] name = "thiserror" -version = "2.0.3" +version = "2.0.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c006c85c7651b3cf2ada4584faa36773bd07bac24acfb39f3c431b36d7e667aa" +checksum = "93605438cbd668185516ab499d589afb7ee1859ea3d5fc8f6b0755e1c7443767" dependencies = [ - "thiserror-impl 2.0.3", + "thiserror-impl 2.0.7", ] [[package]] name = "thiserror-impl" -version = "1.0.64" +version = "1.0.69" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "08904e7672f5eb876eaaf87e0ce17857500934f4981c4a0ab2b4aa98baac7fc3" +checksum = "4fee6c4efc90059e10f81e6d42c60a18f76588c3d74cb83a0b242a2b6c7504c1" dependencies = [ "proc-macro2", "quote", @@ -9631,9 +10126,9 @@ dependencies = [ [[package]] name = "thiserror-impl" -version = "2.0.3" +version = "2.0.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f077553d607adc1caf65430528a576c757a71ed73944b66ebb58ef2bbd243568" +checksum = "e1d8749b4531af2117677a5fcd12b1348a3fe2b81e36e61ffeac5c4aa3273e36" dependencies = [ "proc-macro2", "quote", @@ -9694,9 +10189,9 @@ dependencies = [ [[package]] name = "time" -version = "0.3.36" +version = "0.3.37" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5dfd88e563464686c916c7e46e623e520ddc6d79fa6641390f2e3fa86e83e885" +checksum = "35e7868883861bd0e56d9ac6efcaaca0d6d5d82a2a7ec8209ff492c07cf37b21" dependencies = [ "deranged", "itoa", @@ -9715,9 +10210,9 @@ checksum = "ef927ca75afb808a4d64dd374f00a2adf8d0fcff8e7b184af886c3c87ec4a3f3" [[package]] name = "time-macros" -version = "0.2.18" +version = "0.2.19" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3f252a68540fde3a3877aeea552b832b40ab9a69e318efd078774a01ddee1ccf" +checksum = "2834e6017e3e5e4b9834939793b282bc03b37a3336245fa820e35e233e2a85de" dependencies = [ "num-conv", "time-core", @@ -9732,6 +10227,16 @@ dependencies = [ "crunchy", ] +[[package]] +name = "tinystr" +version = "0.7.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9117f5d4db391c1cf6927e7bea3db74b9a1c1add8f7eda9ffd5364f40f57b82f" +dependencies = [ + "displaydoc", + "zerovec", +] + [[package]] name = "tinytemplate" version = "1.2.1" @@ -9759,9 +10264,9 @@ checksum = "1f3ccbac311fea05f86f61904b462b55fb3df8837a366dfc601a0161d0532f20" [[package]] name = "tokio" -version = "1.41.0" +version = "1.42.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "145f3413504347a2be84393cc8a7d2fb4d863b375909ea59f2158261aa258bbb" +checksum = "5cec9b21b0450273377fc97bd4c33a8acffc8c996c987a7c5b319a0083707551" dependencies = [ "backtrace", "bytes", @@ -9866,12 +10371,11 @@ dependencies = [ [[package]] name = "tokio-rustls" -version = "0.26.0" +version = "0.26.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0c7bc40d0e5a97695bb96e27995cd3a08538541b0a846f65bba7a359f36700d4" +checksum = "5f6d0975eaace0cf0fcadee4e4aaa5da15b5c079146f2cffb67c113be122bf37" dependencies = [ - "rustls 0.23.14", - "rustls-pki-types", + "rustls 0.23.20", "tokio", ] @@ -9893,9 +10397,9 @@ dependencies = [ [[package]] name = "tokio-stream" -version = "0.1.16" +version = "0.1.17" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4f4e6ce100d0eb49a2734f8c0812bcd324cf357d21810932c5df6b96ef2b86f1" +checksum = "eca58d7bba4a75707817a2c44174253f9236b2d5fbd055602e9d5c07c139a047" dependencies = [ "futures-core", "pin-project-lite", @@ -9904,9 +10408,9 @@ dependencies = [ [[package]] name = "tokio-util" -version = "0.7.12" +version = "0.7.13" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "61e7c3654c13bcd040d4a03abee2c75b1d14a37b423cf5a813ceae1cc903ec6a" +checksum = "d7fcaa8d55a2bdd6b83ace262b016eca0d79ee02818c5c1bcdf0305114081078" dependencies = [ "bytes", "futures-core", @@ -9925,7 +10429,7 @@ dependencies = [ "serde", "serde_spanned", "toml_datetime", - "toml_edit 0.22.22", + "toml_edit", ] [[package]] @@ -9937,28 +10441,17 @@ dependencies = [ "serde", ] -[[package]] -name = "toml_edit" -version = "0.19.15" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1b5bb770da30e5cbfde35a2d7b9b8a2c4b8ef89548a7a6aeab5c9a576e3e7421" -dependencies = [ - "indexmap 2.6.0", - "toml_datetime", - "winnow 0.5.40", -] - [[package]] name = "toml_edit" version = "0.22.22" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "4ae48d6208a266e853d946088ed816055e556cc6028c5e8e2b84d9fa5dd7c7f5" dependencies = [ - "indexmap 2.6.0", + "indexmap 2.7.0", "serde", "serde_spanned", "toml_datetime", - "winnow 0.6.20", + "winnow", ] [[package]] @@ -9973,20 +10466,20 @@ dependencies = [ "base64 0.22.1", "bytes", "flate2", - "h2 0.4.6", - "http 1.1.0", + "h2 0.4.7", + "http 1.2.0", "http-body 1.0.1", "http-body-util", - "hyper 1.5.0", + "hyper 1.5.2", "hyper-timeout", "hyper-util", "percent-encoding", "pin-project", - "prost 0.13.3", + "prost 0.13.4", "rustls-pemfile 2.2.0", "socket2", "tokio", - "tokio-rustls 0.26.0", + "tokio-rustls 0.26.1", "tokio-stream", "tower 0.4.13", "tower-layer", @@ -10017,14 +10510,14 @@ dependencies = [ [[package]] name = "tower" -version = "0.5.1" +version = "0.5.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2873938d487c3cfb9aed7546dc9f2711d867c9f90c46b889989a2cb84eba6b4f" +checksum = "d039ad9159c98b70ecfd540b2573b97f7f52c3e8d9f8ad57a24b916a536975f9" dependencies = [ "futures-core", "futures-util", "pin-project-lite", - "sync_wrapper 0.1.2", + "sync_wrapper 1.0.2", "tower-layer", "tower-service", ] @@ -10043,9 +10536,9 @@ checksum = "8df9b6e13f2d32c91b9bd719c00d1958837bc7dec474d94952798cc8e69eeec3" [[package]] name = "tracing" -version = "0.1.40" +version = "0.1.41" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c3523ab5a71916ccf420eebdf5521fcef02141234bbc0b8a49f2fdc4544364ef" +checksum = "784e0ac535deb450455cbfa28a6f0df145ea1bb7ae51b821cf5e7927fdcfbdd0" dependencies = [ "log", "pin-project-lite", @@ -10055,9 +10548,9 @@ dependencies = [ [[package]] name = "tracing-attributes" -version = "0.1.27" +version = "0.1.28" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "34704c8d6ebcbc939824180af020566b01a7c01f80641264eba0999f6c2b6be7" +checksum = "395ae124c09f9e6918a2310af6038fba074bcf474ac352496d5910dd59a2226d" dependencies = [ "proc-macro2", "quote", @@ -10066,9 +10559,9 @@ dependencies = [ [[package]] name = "tracing-core" -version = "0.1.32" +version = "0.1.33" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c06d3da6113f116aaee68e4d601191614c9053067f9ab7f6edbcb161237daa54" +checksum = "e672c95779cf947c5311f83787af4fa8fffd12fb27e4993211a84bdfd9610f9c" dependencies = [ "once_cell", "valuable", @@ -10100,9 +10593,9 @@ dependencies = [ [[package]] name = "tracing-subscriber" -version = "0.3.18" +version = "0.3.19" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ad0f048c97dbd9faa9b7df56362b8ebcaa52adb06b498c050d2f4e32f90a7a8b" +checksum = "e8189decb5ac0fa7bc8b96b7cb9b2701d60d48805aca84a238004d665fcc4008" dependencies = [ "matchers", "nu-ansi-term", @@ -10135,6 +10628,18 @@ dependencies = [ "syn 1.0.109", ] +[[package]] +name = "trim-in-place" +version = "0.1.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "343e926fc669bc8cde4fa3129ab681c63671bae288b1f1081ceee6d9d37904fc" + +[[package]] +name = "triomphe" +version = "0.1.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "859eb650cfee7434994602c3a68b25d77ad9e68c8a6cd491616ef86661382eb3" + [[package]] name = "try-lock" version = "0.2.5" @@ -10157,7 +10662,16 @@ version = "0.19.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a06fbd5b8de54c5f7c91f6fe4cebb949be2125d7758e630bb58b1d831dbce600" dependencies = [ - "typed-builder-macro", + "typed-builder-macro 0.19.1", +] + +[[package]] +name = "typed-builder" +version = "0.20.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7e14ed59dc8b7b26cacb2a92bad2e8b1f098806063898ab42a3bd121d7d45e75" +dependencies = [ + "typed-builder-macro 0.20.0", ] [[package]] @@ -10171,6 +10685,17 @@ dependencies = [ "syn 2.0.90", ] +[[package]] +name = "typed-builder-macro" +version = "0.20.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "560b82d656506509d43abe30e0ba64c56b1953ab3d4fe7ba5902747a7a3cedd5" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.90", +] + [[package]] name = "typedmap" version = "0.3.1" @@ -10212,7 +10737,7 @@ dependencies = [ "serde", "serde_json", "syn 2.0.90", - "thiserror 1.0.64", + "thiserror 1.0.69", "unicode-ident", ] @@ -10247,24 +10772,21 @@ checksum = "ccb97dac3243214f8d8507998906ca3e2e0b900bf9bf4870477f125b82e68f6e" [[package]] name = "unicase" -version = "2.7.0" +version = "2.8.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f7d2d4dafb69621809a81864c9c1b864479e1235c0dd4e199924b9742439ed89" -dependencies = [ - "version_check", -] +checksum = "7e51b68083f157f853b6379db119d1c1be0e6e4dec98101079dec41f6f5cf6df" [[package]] name = "unicode-bidi" -version = "0.3.17" +version = "0.3.18" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5ab17db44d7388991a428b2ee655ce0c212e862eff1768a455c58f9aad6e7893" +checksum = "5c1cb5db39152898a79168971543b1cb5020dff7fe43c8dc468b0885f5e29df5" [[package]] name = "unicode-ident" -version = "1.0.13" +version = "1.0.14" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e91b56cd4cadaeb79bbf1a5645f6b4f8dc5bde8834ad5894a8db35fda9efa1fe" +checksum = "adb9e6ca4f869e1180728b7950e35922a7fc6397f7b641499e8f3ef06e50dc83" [[package]] name = "unicode-linebreak" @@ -10299,6 +10821,12 @@ version = "0.1.11" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e51733f11c9c4f72aa0c160008246859e340b00807569a0da0e7a1079b27ba85" +[[package]] +name = "unicode-width" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1fc81956842c57dac11422a97c3b8195a1ff727f06e85c84ed2e8aa277c9a0fd" + [[package]] name = "unicode-xid" version = "0.2.6" @@ -10325,9 +10853,9 @@ checksum = "8ecb6da28b8a351d773b68d5825ac39017e680750f980f3a1a85cd8dd28a47c1" [[package]] name = "url" -version = "2.5.2" +version = "2.5.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "22784dbdf76fdde8af1aeda5622b546b422b6fc585325248a2bf9f5e41e94d6c" +checksum = "32f8b686cadd1473f4bd0117a5d28d36b1ade384ea9b5069a1c40aefed7fda60" dependencies = [ "form_urlencoded", "idna", @@ -10349,6 +10877,18 @@ version = "2.1.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "daf8dba3b7eb870caf1ddeed7bc9d2a049f3cfdfae7cb521b087cc33ae4c49da" +[[package]] +name = "utf16_iter" +version = "1.0.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c8232dd3cdaed5356e0f716d285e4b40b932ac434100fe9b7e0e8e935b9e6246" + +[[package]] +name = "utf8_iter" +version = "1.0.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b6c140620e7ffbb22c2dee59cafe6084a59b5ffc27a8859a5f0d494b5d52b6be" + [[package]] name = "utf8parse" version = "0.2.2" @@ -10361,7 +10901,7 @@ version = "4.2.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c5afb1a60e207dca502682537fefcfd9921e71d0b83e9576060f09abc6efab23" dependencies = [ - "indexmap 2.6.0", + "indexmap 2.7.0", "serde", "serde_json", "utoipa-gen", @@ -10390,7 +10930,7 @@ dependencies = [ "actix-web", "mime_guess", "regex", - "reqwest 0.12.8", + "reqwest 0.12.9", "rust-embed", "serde", "serde_json", @@ -10401,9 +10941,9 @@ dependencies = [ [[package]] name = "uuid" -version = "1.10.0" +version = "1.11.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "81dfa00651efa65069b0b6b651f4aaa31ba9e3c3ce0137aaad053604ee7e0314" +checksum = "f8c5f0a0af699448548ad1a2fbf920fb4bee257eae39953ba95cb84891a0446a" dependencies = [ "getrandom", "serde", @@ -10423,9 +10963,9 @@ checksum = "830b7e5d4d90034032940e4ace0d9a9a057e7a45cd94e6c007832e39edb82f6d" [[package]] name = "value-bag" -version = "1.9.0" +version = "1.10.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5a84c137d37ab0142f0f2ddfe332651fdbf252e7b7dbb4e67b6c1f1b2e925101" +checksum = "3ef4c4aa54d5d05a279399bfa921ec387b7aba77caf7a682ae8d86785b8fdad2" [[package]] name = "vcpkg" @@ -10519,9 +11059,9 @@ checksum = "b8dad83b4f25e74f184f64c43b150b91efe7647395b42289f38e50566d82855b" [[package]] name = "wasm-bindgen" -version = "0.2.95" +version = "0.2.99" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "128d1e363af62632b8eb57219c8fd7877144af57558fb2ef0368d0087bddeb2e" +checksum = "a474f6281d1d70c17ae7aa6a613c87fce69a127e2624002df63dcb39d6cf6396" dependencies = [ "cfg-if", "once_cell", @@ -10530,13 +11070,12 @@ dependencies = [ [[package]] name = "wasm-bindgen-backend" -version = "0.2.95" +version = "0.2.99" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cb6dd4d3ca0ddffd1dd1c9c04f94b868c37ff5fac97c30b97cff2d74fce3a358" +checksum = "5f89bb38646b4f81674e8f5c3fb81b562be1fd936d84320f3264486418519c79" dependencies = [ "bumpalo", "log", - "once_cell", "proc-macro2", "quote", "syn 2.0.90", @@ -10545,21 +11084,22 @@ dependencies = [ [[package]] name = "wasm-bindgen-futures" -version = "0.4.45" +version = "0.4.49" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cc7ec4f8827a71586374db3e87abdb5a2bb3a15afed140221307c3ec06b1f63b" +checksum = "38176d9b44ea84e9184eff0bc34cc167ed044f816accfe5922e54d84cf48eca2" dependencies = [ "cfg-if", "js-sys", + "once_cell", "wasm-bindgen", "web-sys", ] [[package]] name = "wasm-bindgen-macro" -version = "0.2.95" +version = "0.2.99" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e79384be7f8f5a9dd5d7167216f022090cf1f9ec128e6e6a482a2cb5c5422c56" +checksum = "2cc6181fd9a7492eef6fef1f33961e3695e4579b9872a6f7c83aee556666d4fe" dependencies = [ "quote", "wasm-bindgen-macro-support", @@ -10567,9 +11107,9 @@ dependencies = [ [[package]] name = "wasm-bindgen-macro-support" -version = "0.2.95" +version = "0.2.99" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "26c6ab57572f7a24a4985830b120de1594465e5d500f24afe89e16b4e833ef68" +checksum = "30d7a95b763d3c45903ed6c81f156801839e5ee968bb07e534c44df0fcd330c2" dependencies = [ "proc-macro2", "quote", @@ -10580,15 +11120,15 @@ dependencies = [ [[package]] name = "wasm-bindgen-shared" -version = "0.2.95" +version = "0.2.99" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "65fc09f10666a9f147042251e0dda9c18f166ff7de300607007e96bdebc1068d" +checksum = "943aab3fdaaa029a6e0271b35ea10b72b943135afe9bffca82384098ad0e06a6" [[package]] name = "wasm-streams" -version = "0.4.1" +version = "0.4.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4e072d4e72f700fb3443d8fe94a39315df013eef1104903cdb0a2abd322bbecd" +checksum = "15053d8d85c7eccdbefef60f06769760a563c7f0a9d6902a13d35c7800b0ad65" dependencies = [ "futures-util", "js-sys", @@ -10599,9 +11139,9 @@ dependencies = [ [[package]] name = "web-sys" -version = "0.3.72" +version = "0.3.76" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f6488b90108c040df0fe62fa815cbdee25124641df01814dd7282749234c6112" +checksum = "04dd7223427d52553d3702c004d3b2fe07c148165faa56313cb00211e31c12bc" dependencies = [ "js-sys", "wasm-bindgen", @@ -10629,9 +11169,9 @@ dependencies = [ [[package]] name = "webpki-roots" -version = "0.26.6" +version = "0.26.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "841c67bff177718f1d4dfefde8d8f0e78f9b6589319ba88312f567fc5841a958" +checksum = "5d642ff16b7e79272ae451b7322067cdc17cadf68c23264be9d94a32319efe7e" dependencies = [ "rustls-pki-types", ] @@ -10877,15 +11417,6 @@ version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "589f6da84c646204747d1270a2a5661ea66ed1cced2631d546fdfb155959f9ec" -[[package]] -name = "winnow" -version = "0.5.40" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f593a95398737aeed53e489c785df13f3618e41dbcd6718c6addbf1395aa6876" -dependencies = [ - "memchr", -] - [[package]] name = "winnow" version = "0.6.20" @@ -10916,9 +11447,9 @@ dependencies = [ "base64 0.22.1", "deadpool 0.10.0", "futures", - "http 1.1.0", + "http 1.2.0", "http-body-util", - "hyper 1.5.0", + "hyper 1.5.2", "hyper-util", "log", "once_cell", @@ -10929,6 +11460,18 @@ dependencies = [ "url", ] +[[package]] +name = "write16" +version = "1.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d1890f4022759daae28ed4fe62859b1236caebfc61ede2f63ed4e695f3f6d936" + +[[package]] +name = "writeable" +version = "0.5.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1e9df38ee2d2c3c5948ea468a8406ff0db0b29ae1ffde1bcf20ef305bcc95c51" + [[package]] name = "wyz" version = "0.5.1" @@ -10976,6 +11519,30 @@ version = "1.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "cfe53a6657fd280eaa890a3bc59152892ffa3e30101319d168b781ed6529b049" +[[package]] +name = "yoke" +version = "0.7.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "120e6aef9aa629e3d4f52dc8cc43a015c7724194c97dfaf45180d2daf2b77f40" +dependencies = [ + "serde", + "stable_deref_trait", + "yoke-derive", + "zerofrom", +] + +[[package]] +name = "yoke-derive" +version = "0.7.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2380878cad4ac9aac1e2435f3eb4020e8374b5f13c296cb75b4620ff8e229154" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.90", + "synstructure", +] + [[package]] name = "z85" version = "3.0.5" @@ -11003,12 +11570,55 @@ dependencies = [ "syn 2.0.90", ] +[[package]] +name = "zerofrom" +version = "0.1.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cff3ee08c995dee1859d998dea82f7374f2826091dd9cd47def953cae446cd2e" +dependencies = [ + "zerofrom-derive", +] + +[[package]] +name = "zerofrom-derive" +version = "0.1.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "595eed982f7d355beb85837f651fa22e90b3c044842dc7f2c2842c086f295808" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.90", + "synstructure", +] + [[package]] name = "zeroize" version = "1.8.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ced3678a2879b30306d323f4542626697a464a97c0a07c9aebf7ebca65cd4dde" +[[package]] +name = "zerovec" +version = "0.10.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "aa2b893d79df23bfb12d5461018d408ea19dfafe76c2c7ef6d4eba614f8ff079" +dependencies = [ + "yoke", + "zerofrom", + "zerovec-derive", +] + +[[package]] +name = "zerovec-derive" +version = "0.10.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6eafa6dfb17584ea3e2bd6e76e0cc15ad7af12b09abdd1ca55961bed9b1063c6" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.90", +] + [[package]] name = "zip" version = "0.6.6" @@ -11040,9 +11650,9 @@ dependencies = [ "crossbeam-utils", "displaydoc", "flate2", - "indexmap 2.6.0", - "num_enum 0.7.3", - "thiserror 1.0.64", + "indexmap 2.7.0", + "num_enum", + "thiserror 1.0.69", ] [[package]] @@ -11103,9 +11713,9 @@ dependencies = [ [[package]] name = "zstd-sys" -version = "2.0.12+zstd.1.5.6" +version = "2.0.13+zstd.1.5.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0a4e40c320c3cb459d9a9ff6de98cff88f4751ee9275d140e2be94a2b74e4c13" +checksum = "38ff0f21cfee8f97d94cef41359e0c89aa6113028ab0291aa8ca0038995a95aa" dependencies = [ "cc", "pkg-config", diff --git a/Cargo.toml b/Cargo.toml index df2365de65..bc0ef46d29 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -10,6 +10,7 @@ members = [ "crates/sqllib", "sql-to-dbsp-compiler/lib/readers", "crates/datagen", + "crates/iceberg" ] exclude = [ "sql-to-dbsp-compiler/temp", diff --git a/Earthfile b/Earthfile index 41c10a3a22..41e1aa6598 100644 --- a/Earthfile +++ b/Earthfile @@ -212,12 +212,16 @@ test-adapters: DO rust+SET_CACHE_MOUNTS_ENV ARG DELTA_TABLE_TEST_AWS_ACCESS_KEY_ID ARG DELTA_TABLE_TEST_AWS_SECRET_ACCESS_KEY + ARG ICEBERG_TEST_AWS_ACCESS_KEY_ID + ARG ICEBERG_TEST_AWS_SECRET_ACCESS_KEY + # Dependencies needed by the Iceberg test + RUN pip3 install -r crates/iceberg/src/test/requirements.ci.txt WITH DOCKER --pull redpandadata/redpanda:v23.3.21 RUN --mount=$EARTHLY_RUST_CARGO_HOME_CACHE --mount=$EARTHLY_RUST_TARGET_CACHE docker run -p 9092:9092 --rm -itd redpandadata/redpanda:v23.3.21 \ redpanda start --smp 2 && \ (google-cloud-sdk/bin/gcloud beta emulators pubsub start --project=feldera-test --host-port=127.0.0.1:8685 &) && \ sleep 5 && \ - RUST_BACKTRACE=1 cargo test --package dbsp_adapters --features "pubsub-emulator-test" --package feldera-sqllib + RUST_BACKTRACE=1 cargo test --package dbsp_adapters --features "pubsub-emulator-test,iceberg-tests-fs,iceberg-tests-glue" --package feldera-sqllib END test-manager: diff --git a/crates/adapters/Cargo.toml b/crates/adapters/Cargo.toml index d0cd46ef6f..29456b293a 100644 --- a/crates/adapters/Cargo.toml +++ b/crates/adapters/Cargo.toml @@ -11,9 +11,10 @@ categories = ["database", "api-bindings", "network-programming"] publish = false [features] -default = ["with-kafka", "with-deltalake", "with-avro", "with-nexmark", "with-pubsub"] +default = ["with-kafka", "with-deltalake", "with-iceberg", "with-avro", "with-nexmark", "with-pubsub"] with-kafka = ["rdkafka"] with-deltalake = ["deltalake"] +with-iceberg = ["feldera-iceberg"] with-pubsub = ["google-cloud-pubsub", "google-cloud-gax"] with-avro = ["apache-avro", "schema_registry_converter", "feldera-adapterlib/with-avro"] with-nexmark = ["dbsp_nexmark"] @@ -27,11 +28,15 @@ pubsub-emulator-test = [] # Google Cloud Application Default Credentials (ADC) must be configured. See `pubsub/test.rs`. pubsub-gcp-test = [] feldera-enterprise = [] +iceberg-tests-fs = [] +iceberg-tests-glue = [] +iceberg-tests-rest = [] [dependencies] feldera-types = { path = "../feldera-types" } feldera-adapterlib = { path = "../adapterlib" } feldera-datagen = { path = "../datagen" } +feldera-iceberg = { path = "../iceberg", optional = true } awc = { version = "3.1.1", default-features = false, features = ["compress-gzip", "compress-brotli", "cookies", "rustls-0_23-webpki-roots"] } async-stream = "0.3.5" anyhow = { version = "1.0.57", features = ["backtrace"] } @@ -125,7 +130,7 @@ size-of = { version = "0.1.5", package = "feldera-size-of", features = ["time-st tempfile = "3.3.0" proptest = "1.5.0" proptest-derive = "0.5.0" -futures = "0.3.25" +futures = "0.3.30" bytestring = "1.2.0" actix-codec = "0.5.0" async-stream = "0.3.5" diff --git a/crates/adapters/src/integrated/delta_table/input.rs b/crates/adapters/src/integrated/delta_table/input.rs index 82c706e296..50866a485b 100644 --- a/crates/adapters/src/integrated/delta_table/input.rs +++ b/crates/adapters/src/integrated/delta_table/input.rs @@ -575,7 +575,7 @@ impl DeltaTableInputEndpointInner { if let Some(timestamp_column) = &self.config.timestamp_column { validate_timestamp_column( &self.endpoint_name, - ×tamp_column, + timestamp_column, &self.datafusion, schema, "see DeltaLake connector documentation for more details: https://docs.feldera.com/connectors/sources/delta" diff --git a/crates/adapters/src/integrated/mod.rs b/crates/adapters/src/integrated/mod.rs index ae0e3cf387..ad96f74909 100644 --- a/crates/adapters/src/integrated/mod.rs +++ b/crates/adapters/src/integrated/mod.rs @@ -4,13 +4,12 @@ #![allow(unreachable_code)] use crate::controller::{ControllerInner, EndpointId}; +use crate::transport::IntegratedInputEndpoint; use crate::{ControllerError, Encoder, InputConsumer, OutputEndpoint, TransportInputEndpoint}; use feldera_types::config::{InputEndpointConfig, OutputEndpointConfig, TransportConfig}; use feldera_types::program_schema::Relation; use std::sync::Weak; -use crate::transport::IntegratedInputEndpoint; - #[cfg(feature = "with-deltalake")] mod delta_table; @@ -20,6 +19,9 @@ pub use delta_table::{DeltaTableInputEndpoint, DeltaTableWriter}; #[cfg(feature = "with-deltalake")] use feldera_types::config::TransportConfig::DeltaTableInput; +#[cfg(feature = "with-iceberg")] +use feldera_types::config::TransportConfig::IcebergInput; + /// An integrated output connector implements both transport endpoint /// (`OutputEndpoint`) and `Encoder` traits. It is used to implement /// connectors whose transport protocol and data format are tightly coupled. @@ -92,6 +94,12 @@ pub fn create_integrated_input_endpoint( config, consumer, )), + #[cfg(feature = "with-iceberg")] + IcebergInput(config) => Box::new(feldera_iceberg::IcebergInputEndpoint::new( + endpoint_name, + config, + consumer, + )), transport => { return Err(ControllerError::unknown_input_transport( endpoint_name, diff --git a/crates/adapters/src/test/data.rs b/crates/adapters/src/test/data.rs index e7d27bc0f8..9af3ebbed3 100644 --- a/crates/adapters/src/test/data.rs +++ b/crates/adapters/src/test/data.rs @@ -4,7 +4,7 @@ use arrow::array::{ }; use arrow::datatypes::{DataType, Schema, TimeUnit}; use dbsp::utils::Tup2; -use feldera_sqllib::{Date, Timestamp}; +use feldera_sqllib::{ByteArray, Date, Time, Timestamp, F32, F64}; use feldera_types::program_schema::{ColumnType, Field, Relation, SqlIdentifier}; use feldera_types::{ deserialize_table_record, deserialize_without_context, serialize_struct, serialize_table_record, @@ -12,6 +12,7 @@ use feldera_types::{ use prop::sample::SizeRange; use proptest::{collection, prelude::*}; use proptest_derive::Arbitrary; +use rust_decimal::Decimal; use size_of::SizeOf; use std::collections::BTreeMap; use std::string::ToString; @@ -421,13 +422,7 @@ impl TestStruct2 { let row6_field = Arc::new(arrow::datatypes::Field::new("a", DataType::Boolean, false)); let row6: Vec> = data .iter() - .map(|r| { - if let Some(emb_struct) = &r.field_5 { - Some(emb_struct.field) - } else { - None - } - }) + .map(|r| r.field_5.as_ref().map(|emb_struct| emb_struct.field)) .collect(); let row6_booleans = Arc::new(BooleanArray::from(row6)); @@ -564,3 +559,204 @@ deserialize_table_record!(DatabricksPeople["DatabricksPeople", 8] { (r#ssn, "ssn", false, Option, Some(None)), (r#salary, "salary", false, Option, Some(None)) }); + +/// Struct will all types supported by the Iceberg connector. +#[derive( + Debug, + Default, + PartialEq, + Eq, + PartialOrd, + Ord, + serde::Serialize, + serde::Deserialize, + Clone, + Hash, + SizeOf, + rkyv::Archive, + rkyv::Serialize, + rkyv::Deserialize, +)] +#[archive_attr(derive(Ord, Eq, PartialEq, PartialOrd))] +pub struct IcebergTestStruct { + pub b: bool, + pub i: i32, + pub l: i64, + pub r: F32, + pub d: F64, + pub dec: Decimal, + pub dt: Date, + pub tm: Time, + pub ts: Timestamp, + pub s: String, + //pub uuid: ByteArray, + pub fixed: ByteArray, + pub varbin: ByteArray, +} + +impl Arbitrary for IcebergTestStruct { + type Parameters = (); + + type Strategy = BoxedStrategy; + + fn arbitrary_with(_params: Self::Parameters) -> Self::Strategy { + ( + // Split into two tuples with <12 fields each. + ( + bool::arbitrary(), + i32::arbitrary(), + i64::arbitrary(), + f32::arbitrary(), + f64::arbitrary(), + 0..1_000_000i128, + // Scale + 0..3u32, + 0i32..100_000, + // Time in nanos + 0u64..24 * 3600 * 1_000_000_000, + // Generate timestamps within a 1-year range + 1704070800i64..1735693200, + ), + // String in the range "0".."1000" + ( + 0i32..1000, + // // UUID + // collection::vec(u8::arbitrary(), 16..=16), + // Fixed + collection::vec(u8::arbitrary(), 5..=5), + // Varbinary + collection::vec(u8::arbitrary(), 0..=10), + ), + ) + .prop_map( + |( + (b, i, l, r, d, dec_num, dec_scale, dt, tm, ts), + (s, /*uuid,*/ fixed, varbin), + ): ( + (bool, i32, i64, f32, f64, i128, u32, i32, u64, i64), + (i32, /*Vec,*/ Vec, Vec), + )| { + IcebergTestStruct { + b, + i, + l, + r: F32::new(r), + d: F64::new(d), + dec: Decimal::from_i128_with_scale(dec_num, dec_scale), + dt: Date::new(dt), + tm: Time::new(tm), + ts: Timestamp::new(ts), + s: s.to_string(), + // uuid: ByteArray::from_vec(uuid), + fixed: ByteArray::from_vec(fixed), + varbin: ByteArray::new(&varbin), + } + }, + ) + .boxed() + } +} + +impl IcebergTestStruct { + pub fn arrow_schema() -> Arc { + Arc::new(Schema::new(vec![ + arrow::datatypes::Field::new("b", DataType::Boolean, false), + arrow::datatypes::Field::new("i", DataType::Int32, false), + arrow::datatypes::Field::new("l", DataType::Int64, false), + arrow::datatypes::Field::new("r", DataType::Float32, false), + arrow::datatypes::Field::new("d", DataType::Float64, false), + arrow::datatypes::Field::new("dec", DataType::Decimal128(10, 3), false), + arrow::datatypes::Field::new("dt", DataType::Date32, false), + arrow::datatypes::Field::new("tm", DataType::Time64(TimeUnit::Microsecond), false), + arrow::datatypes::Field::new( + "ts", + DataType::Timestamp(TimeUnit::Microsecond, None), + false, + ), + arrow::datatypes::Field::new("s", DataType::Utf8, false), + // arrow::datatypes::Field::new("uuid", DataType::FixedSizeBinary(16), false), + arrow::datatypes::Field::new("fixed", DataType::FixedSizeBinary(5), false), + arrow::datatypes::Field::new("varbin", DataType::Binary, false), + ])) + } + + pub fn schema() -> Vec { + vec![ + Field::new("b".into(), ColumnType::boolean(false)), + Field::new("i".into(), ColumnType::int(false)), + Field::new("l".into(), ColumnType::bigint(false)), + Field::new("r".into(), ColumnType::real(false)), + Field::new("d".into(), ColumnType::double(false)), + Field::new("dec".into(), ColumnType::decimal(10, 3, false)), + Field::new("dt".into(), ColumnType::date(false)), + Field::new("tm".into(), ColumnType::time(false)), + Field::new("ts".into(), ColumnType::timestamp(false)), + Field::new("s".into(), ColumnType::varchar(false)), + // Field::new("uuid".into(), ColumnType::fixed(16, false)), + Field::new("fixed".into(), ColumnType::fixed(5, false)), + Field::new("varbin".into(), ColumnType::varbinary(false)), + ] + } + + pub fn schema_with_lateness() -> Vec { + let fields = vec![ + Field::new("b".into(), ColumnType::boolean(false)), + Field::new("i".into(), ColumnType::int(false)), + Field::new("l".into(), ColumnType::bigint(false)), + Field::new("r".into(), ColumnType::real(false)), + Field::new("d".into(), ColumnType::double(false)), + Field::new("dec".into(), ColumnType::decimal(10, 3, false)), + Field::new("dt".into(), ColumnType::date(false)), + Field::new("tm".into(), ColumnType::time(false)), + Field::new("ts".into(), ColumnType::timestamp(false)) + .with_lateness("interval '10 days'"), + Field::new("s".into(), ColumnType::varchar(false)), + // Field::new("uuid".into(), ColumnType::fixed(16, false)), + Field::new("fixed".into(), ColumnType::fixed(5, false)), + Field::new("varbin".into(), ColumnType::varbinary(false)), + ]; + + fields + } + + pub fn relation_schema() -> Relation { + Relation { + name: SqlIdentifier::new("IcebergTestStruct", false), + fields: Self::schema(), + materialized: false, + properties: BTreeMap::new(), + } + } +} + +serialize_table_record!(IcebergTestStruct[13]{ + b["b"]: bool, + i["i"]: i32, + l["l"]: i64, + r["r"]: F32, + d["d"]: F64, + dec["dec"]: Decimal, + dt["dt"]: Date, + tm["tm"]: Time, + ts["ts"]: Timestamp, + s["s"]: String, + // uuid["uuid"]: ByteArray, + fixed["fixed"]: ByteArray, + varbin["varbin"]: ByteArray +}); + +deserialize_table_record!(IcebergTestStruct["IcebergTestStruct", 13] { + (b, "b", false, bool, None), + (i, "i", false, i32, None), + (l, "l", false, i64, None), + (r, "r", false, F32, None), + (d, "d", false, F64, None), + (dec, "dec", false, Decimal, None), + (dt, "dt", false, Date, None), + (tm, "tm", false, Time, None), + (ts, "ts", false, Timestamp, None), + (s, "s", false, String, None), + // (uuid, "uuid", false, ByteArray, None), + (fixed, "fixed", false, ByteArray, None), + (varbin, "varbin", false, ByteArray, None) +}); diff --git a/crates/adapters/src/test/iceberg.rs b/crates/adapters/src/test/iceberg.rs new file mode 100644 index 0000000000..5f33cb5adf --- /dev/null +++ b/crates/adapters/src/test/iceberg.rs @@ -0,0 +1,380 @@ +//! See crates/iceberg/srd/tests/README.md for a description of the Iceberg test harness. + +use crate::{ + test::{file_to_zset, wait}, + Controller, +}; +use crossbeam::channel::Receiver; +use dbsp::DBData; +use feldera_sqllib::{ByteArray, F32, F64}; +use feldera_types::{ + config::PipelineConfig, + program_schema::Field, + serde_with_context::{DeserializeWithContext, SerializeWithContext, SqlSerdeConfig}, +}; +use rust_decimal::Decimal; +use std::{collections::HashMap, time::Instant}; +use tempfile::NamedTempFile; +use tracing::info; +use tracing_subscriber::{layer::SubscriberExt, util::SubscriberInitExt, EnvFilter}; + +#[cfg(feature = "iceberg-tests-fs")] +use std::io::Write; + +use super::{test_circuit, IcebergTestStruct}; + +fn init_logging() { + let _ = tracing_subscriber::registry() + .with(tracing_subscriber::fmt::layer().with_test_writer()) + .with( + EnvFilter::try_from_default_env() + .or_else(|_| EnvFilter::try_new("info")) + .unwrap(), + ) + .try_init(); +} + +#[cfg(feature = "iceberg-tests-fs")] +/// Store test dataset in an ndjson file +fn data_to_ndjson(data: Vec) -> NamedTempFile { + println!("delta_table_output_test: preparing input file"); + let mut file = NamedTempFile::new().unwrap(); + for v in data.iter() { + let buffer: Vec = Vec::new(); + let mut serializer = serde_json::Serializer::new(buffer); + v.serialize_with_context(&mut serializer, &SqlSerdeConfig::default()) + .unwrap(); + file.as_file_mut() + .write_all(&serializer.into_inner()) + .unwrap(); + file.write_all(b"\n").unwrap(); + } + + file +} + +/// Read a snapshot of an Iceberg table with records of type `T` to a temporary JSON file. +fn iceberg_snapshot_to_json(schema: &[Field], config: &HashMap) -> NamedTempFile +where + T: DBData + + SerializeWithContext + + for<'de> DeserializeWithContext<'de, SqlSerdeConfig> + + Sync, +{ + let start = Instant::now(); + let json_file = NamedTempFile::new().unwrap(); + println!( + "iceberg_snapshot_to_json: writing output to {}", + json_file.path().display() + ); + + let mut config = config.clone(); + config.insert("mode".to_string(), "snapshot".to_string()); + + let (input_pipeline, err_receiver) = + iceberg_input_pipeline::(schema, &config, &json_file.path().display().to_string()); + input_pipeline.start(); + wait( + || input_pipeline.status().pipeline_complete() || err_receiver.len() > 0, + 400_000, + ) + .expect("timeout"); + + assert!(err_receiver.is_empty()); + + input_pipeline.stop().unwrap(); + + info!("Read Iceberg snapshot in {:?}", start.elapsed()); + + json_file +} + +/// Build a pipeline that reads from an Iceberg table and writes to a JSON file. +fn iceberg_input_pipeline( + schema: &[Field], + config: &HashMap, + output_file_path: &str, +) -> (Controller, Receiver) +where + T: DBData + + SerializeWithContext + + for<'de> DeserializeWithContext<'de, SqlSerdeConfig> + + Sync, +{ + init_logging(); + + let mut options = String::new(); + for (key, val) in config.iter() { + options += &format!(" {key}: \"{val}\"\n"); + } + + // Create controller. + let config_str = format!( + r#" +name: test +workers: 4 +outputs: + test_output1: + stream: test_output1 + transport: + name: file_output + config: + path: "{output_file_path}" + format: + name: json + config: + update_format: "insert_delete" +inputs: + test_input1: + stream: test_input1 + transport: + name: "iceberg_input" + config: +{} +"#, + options, + ); + + let config: PipelineConfig = serde_yaml::from_str(&config_str).unwrap(); + let schema = schema.to_vec(); + + let (err_sender, err_receiver) = crossbeam::channel::unbounded(); + + let controller = Controller::with_config( + move |workers| Ok(test_circuit::(workers, &schema)), + &config, + Box::new(move |e| { + let msg = format!("iceberg_input_test: error: {e}"); + println!("{}", msg); + err_sender.send(msg).unwrap() + }), + ) + .unwrap(); + + (controller, err_receiver) +} + +/// Generate up to `max_records` _unique_ records. +#[cfg(feature = "iceberg-tests-fs")] +fn data(n_records: usize) -> Vec { + let mut result = Vec::with_capacity(n_records); + + let mut time = + chrono::NaiveDateTime::parse_from_str("2024-01-01 00:00:00", "%Y-%m-%d %H:%M:%S").unwrap(); + + for i in 0..n_records { + result.push(IcebergTestStruct { + b: i % 2 != 0, + i: i as i32, + l: i as i64, + r: F32::from(i as f32), + d: F64::from(i as f64), + dec: Decimal::new(i as i64, 2), + dt: feldera_sqllib::Date::from_date(time.date()), + tm: feldera_sqllib::Time::from_time(time.time()), + ts: feldera_sqllib::Timestamp::from_naiveDateTime(time), + s: format!("s{i}"), + // uuid: ByteArray::new([0u8; 16].as_slice()), + fixed: ByteArray::new([0u8; 5].as_slice()), + varbin: ByteArray::new([0u8; 5].as_slice()), + }); + + time += std::time::Duration::from_secs(1); + } + + result +} + +#[test] +#[cfg(feature = "iceberg-tests-fs")] +fn iceberg_localfs_input_test_unordered() { + iceberg_localfs_input_test(&[], &|_| true); +} + +#[test] +#[cfg(feature = "iceberg-tests-fs")] +fn iceberg_localfs_input_test_ordered() { + iceberg_localfs_input_test( + &[("timestamp_column".to_string(), "ts".to_string())], + &|_| true, + ); +} + +#[test] +#[cfg(feature = "iceberg-tests-fs")] +fn iceberg_localfs_input_test_ordered_with_filter() { + iceberg_localfs_input_test( + &[ + ("timestamp_column".to_string(), "ts".to_string()), + ("snapshot_filter".to_string(), "i >= 10000".to_string()), + ], + &|x| x.i >= 10000, + ); +} + +#[cfg(feature = "iceberg-tests-fs")] +fn iceberg_localfs_input_test( + extra_config: &[(String, String)], + filter: &dyn Fn(&IcebergTestStruct) -> bool, +) { + let data = data(1_000_000); + + let table_dir = tempfile::TempDir::new().unwrap(); + let table_path = table_dir.path().display().to_string(); + + let ndjson_file = data_to_ndjson(data.clone()); + println!("wrote test data to {}", ndjson_file.path().display()); + + // Uncomment to inspect output parquet files produced by the test. + std::mem::forget(table_dir); + + let script_path = "../iceberg/src/test/create_test_table_s3.py"; + + // Run the Python script using the Python interpreter + let output = std::process::Command::new("python3") + .arg(script_path) + .arg("--catalog=sql") + .arg(format!("--warehouse-path={table_path}")) + .arg(format!("--json-file={}", ndjson_file.path().display())) + .output() + .map_err(|e| { + format!("Error running '{script_path}' script to generate an Iceberg table: {e}") + }) + .unwrap(); + + if !output.status.success() { + panic!( + "'{script_path}' failed (status: {}), stdout:{}\nstderr:{}", + output.status, + &String::from_utf8(output.stdout).unwrap(), + &String::from_utf8(output.stderr).unwrap() + ); + } + + // The script should print table metadata location on the last line. + let metadata_path = String::from_utf8(output.stdout.clone()) + .unwrap() + .lines() + .last() + .unwrap() + .to_string(); + + let mut json_file = iceberg_snapshot_to_json::( + &IcebergTestStruct::schema_with_lateness(), + &[("metadata_location".to_string(), metadata_path.to_string())] + .into_iter() + .chain(extra_config.into_iter().cloned()) + .collect::>(), + ); + + let expected_zset = dbsp::OrdZSet::from_tuples( + (), + data.clone() + .into_iter() + .filter(filter) + .map(|x| dbsp::utils::Tup2(dbsp::utils::Tup2(x, ()), 1)) + .collect(), + ); + let zset = file_to_zset::( + json_file.as_file_mut(), + "json", + r#"update_format: "insert_delete""#, + ); + + assert_eq!(zset, expected_zset); +} + +#[test] +#[cfg(feature = "iceberg-tests-glue")] +fn iceberg_glue_s3_input_test() { + use dbsp::trace::BatchReader; + + // Read delta table unordered. + let mut json_file = iceberg_snapshot_to_json::( + &IcebergTestStruct::schema_with_lateness(), + &[ + ("catalog_type".to_string(), "glue".to_string()), + ( + "glue.warehouse".to_string(), + "s3://feldera-iceberg-test/".to_string(), + ), + ( + "table_name".to_string(), + "iceberg_test.test_table".to_string(), + ), + ( + "glue.access-key-id".to_string(), + std::env::var("ICEBERG_TEST_AWS_ACCESS_KEY_ID").unwrap(), + ), + ( + "glue.secret-access-key".to_string(), + std::env::var("ICEBERG_TEST_AWS_SECRET_ACCESS_KEY").unwrap(), + ), + ("glue.region".to_string(), "us-east-1".to_string()), + ( + "s3.access-key-id".to_string(), + std::env::var("ICEBERG_TEST_AWS_ACCESS_KEY_ID").unwrap(), + ), + ( + "s3.secret-access-key".to_string(), + std::env::var("ICEBERG_TEST_AWS_SECRET_ACCESS_KEY").unwrap(), + ), + ("s3.region".to_string(), "us-east-1".to_string()), + ] + .into_iter() + .collect::>(), + ); + + let zset = file_to_zset::( + json_file.as_file_mut(), + "json", + r#"update_format: "insert_delete""#, + ); + + // The data for this test is generated by the Python script, we don't know the + // exact set of records in the dataset. + assert_eq!(zset.len(), 2000000); +} + +#[test] +#[cfg(feature = "iceberg-tests-rest")] +fn iceberg_rest_s3_input_test() { + use dbsp::trace::BatchReader; + + // Read delta table unordered. + let mut json_file = iceberg_snapshot_to_json::( + &IcebergTestStruct::schema_with_lateness(), + &[ + ("catalog_type".to_string(), "rest".to_string()), + ("rest.uri".to_string(), "http://localhost:8181".to_string()), + ( + "rest.warehouse".to_string(), + "s3://feldera-iceberg-test/".to_string(), + ), + ( + "table_name".to_string(), + "iceberg_test.test_table".to_string(), + ), + ( + "s3.access-key-id".to_string(), + std::env::var("ICEBERG_TEST_AWS_ACCESS_KEY_ID").unwrap(), + ), + ( + "s3.secret-access-key".to_string(), + std::env::var("ICEBERG_TEST_AWS_SECRET_ACCESS_KEY").unwrap(), + ), + ("s3.region".to_string(), "us-east-1".to_string()), + ] + .into_iter() + .collect::>(), + ); + + let zset = file_to_zset::( + json_file.as_file_mut(), + "json", + r#"update_format: "insert_delete""#, + ); + + assert_eq!(zset.len(), 2000000); + //assert_eq!(zset, expected_zset); +} diff --git a/crates/adapters/src/test/mod.rs b/crates/adapters/src/test/mod.rs index 417b4f597e..d88130f959 100644 --- a/crates/adapters/src/test/mod.rs +++ b/crates/adapters/src/test/mod.rs @@ -39,12 +39,22 @@ mod mock_output_consumer; mod datagen; +#[cfg(all( + feature = "with-iceberg", + any( + feature = "iceberg-tests-fs", + feature = "iceberg-tests-glue", + feature = "iceberg-tests-rest" + ) +))] +mod iceberg; + use crate::catalog::InputCollectionHandle; use crate::format::get_input_format; use crate::transport::input_transport_config_to_endpoint; pub use data::{ generate_test_batch, generate_test_batches, generate_test_batches_with_weights, - DatabricksPeople, EmbeddedStruct, TestStruct, TestStruct2, + DatabricksPeople, EmbeddedStruct, IcebergTestStruct, TestStruct, TestStruct2, }; use dbsp::circuit::CircuitConfig; use dbsp::utils::Tup2; @@ -166,7 +176,6 @@ where /// Create a simple test circuit that passes the input stream right through to /// the output. // TODO: parameterize with the number (and types?) of input and output streams. - pub fn test_circuit( config: CircuitConfig, schema: &[Field], diff --git a/crates/adapters/src/transport/mod.rs b/crates/adapters/src/transport/mod.rs index 5ef00cad3f..af28ff829c 100644 --- a/crates/adapters/src/transport/mod.rs +++ b/crates/adapters/src/transport/mod.rs @@ -98,7 +98,8 @@ pub fn input_transport_config_to_endpoint( | TransportConfig::KafkaOutput(_) | TransportConfig::DeltaTableInput(_) | TransportConfig::DeltaTableOutput(_) - | TransportConfig::HttpOutput => return Ok(None), + | TransportConfig::HttpOutput + | TransportConfig::IcebergInput(_) => return Ok(None), }; if fault_tolerant && !endpoint.is_fault_tolerant() { return Err(anyhow!( diff --git a/crates/feldera-types/src/config.rs b/crates/feldera-types/src/config.rs index 7976b7d3f2..8572d1034c 100644 --- a/crates/feldera-types/src/config.rs +++ b/crates/feldera-types/src/config.rs @@ -10,6 +10,7 @@ use crate::transport::datagen::DatagenInputConfig; use crate::transport::delta_table::{DeltaTableReaderConfig, DeltaTableWriterConfig}; use crate::transport::file::{FileInputConfig, FileOutputConfig}; use crate::transport::http::HttpInputConfig; +use crate::transport::iceberg::IcebergReaderConfig; use crate::transport::kafka::{KafkaInputConfig, KafkaOutputConfig}; use crate::transport::nexmark::NexmarkInputConfig; use crate::transport::pubsub::PubSubInputConfig; @@ -494,6 +495,8 @@ pub enum TransportConfig { S3Input(S3InputConfig), DeltaTableInput(DeltaTableReaderConfig), DeltaTableOutput(DeltaTableWriterConfig), + // Prevent rust from complaining about large size difference between enum variants. + IcebergInput(Box), Datagen(DatagenInputConfig), Nexmark(NexmarkInputConfig), /// Direct HTTP input: cannot be instantiated through API @@ -516,6 +519,7 @@ impl TransportConfig { TransportConfig::S3Input(_) => "s3_input".to_string(), TransportConfig::DeltaTableInput(_) => "delta_table_input".to_string(), TransportConfig::DeltaTableOutput(_) => "delta_table_output".to_string(), + TransportConfig::IcebergInput(_) => "iceberg_input".to_string(), TransportConfig::Datagen(_) => "datagen".to_string(), TransportConfig::Nexmark(_) => "nexmark".to_string(), TransportConfig::HttpInput(_) => "http_input".to_string(), diff --git a/crates/feldera-types/src/program_schema.rs b/crates/feldera-types/src/program_schema.rs index c327277f99..7af882906d 100644 --- a/crates/feldera-types/src/program_schema.rs +++ b/crates/feldera-types/src/program_schema.rs @@ -701,6 +701,19 @@ impl ColumnType { } } + pub fn decimal(precision: i64, scale: i64, nullable: bool) -> Self { + ColumnType { + typ: SqlType::Decimal, + nullable, + precision: Some(precision), + scale: Some(scale), + component: None, + fields: None, + key: None, + value: None, + } + } + pub fn varchar(nullable: bool) -> Self { ColumnType { typ: SqlType::Varchar, @@ -727,6 +740,19 @@ impl ColumnType { } } + pub fn fixed(width: i64, nullable: bool) -> Self { + ColumnType { + typ: SqlType::Binary, + nullable, + precision: Some(width), + scale: None, + component: None, + fields: None, + key: None, + value: None, + } + } + pub fn date(nullable: bool) -> Self { ColumnType { typ: SqlType::Date, diff --git a/crates/feldera-types/src/transport/delta_table.rs b/crates/feldera-types/src/transport/delta_table.rs index cc8457320b..c9cd8d155e 100644 --- a/crates/feldera-types/src/transport/delta_table.rs +++ b/crates/feldera-types/src/transport/delta_table.rs @@ -70,7 +70,7 @@ pub enum DeltaTableIngestMode { SnapshotAndFollow, } -/// Delta table output connector configuration. +/// Delta table input connector configuration. #[derive(Debug, Clone, Eq, PartialEq, Deserialize, Serialize, ToSchema)] pub struct DeltaTableReaderConfig { /// Table URI. @@ -132,13 +132,13 @@ pub struct DeltaTableReaderConfig { pub version: Option, /// Optional timestamp for the snapshot in the ISO-8601/RFC-3339 format, e.g., - /// "2024-12-09T16:09:53+00:00. + /// "2024-12-09T16:09:53+00:00". /// /// When this option is set, the connector finds and opens the version of the table as of the - /// specified point in time. In `snapshot` and `snapshot_and_follow` modes, it retrieves the - /// snapshot of this version of the table (based on the server time recorded in the transaction - /// log, not the event time encoded in the data). In `follow` and `snapshot_and_follow` modes, it - /// follows transaction log records **after** this version. + /// specified point in time (based on the server time recorded in the transaction log, not the + /// event time encoded in the data). In `snapshot` and `snapshot_and_follow` modes, it + /// retrieves the snapshot of this version of the table. In `follow` and `snapshot_and_follow` + /// modes, it follows transaction log records **after** this version. /// /// Note: at most one of `version` and `datetime` options can be specified. /// When neither of the two options is specified, the latest committed version of the table diff --git a/crates/feldera-types/src/transport/iceberg.rs b/crates/feldera-types/src/transport/iceberg.rs new file mode 100644 index 0000000000..59d9923900 --- /dev/null +++ b/crates/feldera-types/src/transport/iceberg.rs @@ -0,0 +1,356 @@ +use serde::{Deserialize, Serialize}; +use std::{collections::HashMap, fmt::Display}; +use utoipa::ToSchema; + +/// Iceberg table read mode. +/// +/// Three options are available: +/// +/// * `snapshot` - read a snapshot of the table and stop. +/// +/// * `follow` - continuously ingest changes to the table, starting from a specified snapshot +/// or timestamp. +/// +/// * `snapshot_and_follow` - read a snapshot of the table before switching to continuous ingestion +/// mode. +#[derive(Debug, Clone, Eq, PartialEq, Deserialize, Serialize, ToSchema)] +pub enum IcebergIngestMode { + /// Read a snapshot of the table and stop. + #[serde(rename = "snapshot")] + Snapshot, + + /// Follow the changelog of the table, only ingesting changes (new and deleted rows). + #[serde(rename = "follow")] + Follow, + + /// Take a snapshot of the table before switching to the `follow` mode. + #[serde(rename = "snapshot_and_follow")] + SnapshotAndFollow, +} + +impl Display for IcebergIngestMode { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self { + IcebergIngestMode::Snapshot => f.write_str("snapshot"), + IcebergIngestMode::Follow => f.write_str("follow"), + IcebergIngestMode::SnapshotAndFollow => f.write_str("snapshot_and_follow"), + } + } +} + +#[derive(Debug, Clone, Eq, PartialEq, Deserialize, Serialize, ToSchema)] +pub enum IcebergCatalogType { + #[serde(rename = "rest")] + Rest, + #[serde(rename = "glue")] + Glue, +} + +/// AWS Glue catalog config. +#[derive(Debug, Clone, Eq, PartialEq, Deserialize, Serialize, ToSchema)] +pub struct GlueCatalogConfig { + /// Location for table metadata. + /// + /// Example: `"s3://my-data-warehouse/tables/"` + #[serde(rename = "glue.warehouse")] + pub warehouse: Option, + + /// Configure an alternative endpoint of the Glue service for Glue catalog to access. + /// + /// Example: `"https://glue.us-east-1.amazonaws.com"` + #[serde(rename = "glue.endpoint")] + pub endpoint: Option, + + /// Access key id used to access the Glue catalog. + #[serde(rename = "glue.access-key-id")] + pub access_key_id: Option, + + /// Secret access key used to access the Glue catalog. + #[serde(rename = "glue.secret-access-key")] + pub secret_access_key: Option, + + /// Profile used to access the Glue catalog. + #[serde(rename = "glue.profile-name")] + pub profile_name: Option, + + /// Region of the Glue catalog. + #[serde(rename = "glue.region")] + pub region: Option, + + // Static session token used to access the Glue catalog. + #[serde(rename = "glue.session-token")] + pub session_token: Option, + + /// The 12-digit ID of the Glue catalog. + #[serde(rename = "glue.id")] + pub id: Option, +} + +/// Iceberg REST catalog config. +#[derive(Debug, Clone, Eq, PartialEq, Deserialize, Serialize, ToSchema)] +pub struct RestCatalogConfig { + /// URI identifying the REST catalog server. + #[serde(rename = "rest.uri")] + pub uri: Option, + + /// The default location for managed tables created by the catalog. + #[serde(rename = "rest.warehouse")] + pub warehouse: Option, + + /// Authentication URL to use for client credentials authentication (default: uri + 'v1/oauth/tokens') + #[serde(rename = "rest.oauth2-server-uri")] + pub oauth2_server_uri: Option, + + /// Credential to use for OAuth2 credential flow when initializing the catalog. + /// + /// A key and secret pair separated by ":" (key is optional). + #[serde(rename = "rest.credential")] + pub credential: Option, + + /// Bearer token value to use for `Authorization` header. + #[serde(rename = "rest.token")] + pub token: Option, + + // Desired scope of the requested security token (default: catalog). + #[serde(rename = "rest.scope")] + pub scope: Option, + + /// Customize table storage paths. + /// + /// When combined with the `warehouse` property, the prefix determines + /// how table data is organized within the storage. + #[serde(rename = "rest.prefix")] + pub prefix: Option, + + /// Additional HTTP request headers added to each catalog REST API call. + #[serde(default)] + #[serde(rename = "rest.headers")] + pub headers: Option>, + + /// Logical name of target resource or service. + #[serde(rename = "rest.audience")] + pub audience: Option, + + /// URI for the target resource or service. + #[serde(rename = "rest.resource")] + pub resource: Option, +} + +/// Iceberg input connector configuration. +#[derive(Debug, Clone, Eq, PartialEq, Deserialize, Serialize, ToSchema)] +pub struct IcebergReaderConfig { + /// Table read mode. + pub mode: IcebergIngestMode, + + /// Table column that serves as an event timestamp. + /// + /// When this option is specified, and `mode` is one of `snapshot` or `snapshot_and_follow`, + /// table rows are ingested in the timestamp order, respecting the + /// [`LATENESS`](https://docs.feldera.com/sql/streaming#lateness-expressions) + /// property of the column: each ingested row has a timestamp no more than `LATENESS` + /// time units earlier than the most recent timestamp of any previously ingested row. + /// The ingestion is performed by partitioning the table into timestamp ranges of width + /// `LATENESS`. Each range is processed sequentially, in increasing timestamp order. + /// + /// # Example + /// + /// Consider a table with timestamp column of type `TIMESTAMP` and lateness attribute + /// `INTERVAL 1 DAY`. Assuming that the oldest timestamp in the table is + /// `2024-01-01T00:00:00``, the connector will fetch all records with timestamps + /// from `2024-01-01`, then all records for `2024-01-02`, `2024-01-03`, etc., until all records + /// in the table have been ingested. + /// + /// # Requirements + /// + /// * The timestamp column must be of a supported type: integer, `DATE`, or `TIMESTAMP`. + /// * The timestamp column must be declared with non-zero `LATENESS`. + /// * For efficient ingest, the table must be optimized for timestamp-based + /// queries using partitioning, Z-ordering, or liquid clustering. + pub timestamp_column: Option, + + /// Optional row filter. + /// + /// This option is only valid when `mode` is set to `snapshot` or `snapshot_and_follow`. + /// + /// When specified, only rows that satisfy the filter condition are included in the + /// snapshot. The condition must be a valid SQL Boolean expression that can be used in + /// the `where` clause of the `select * from snapshot where ...` query. + /// + /// This option can be used to specify the range of event times to include in the snapshot, + /// e.g.: `ts BETWEEN '2005-01-01 00:00:00' AND '2010-12-31 23:59:59'`. + pub snapshot_filter: Option, + + /// Optional snapshot id. + /// + /// When this option is set, the connector finds the specified snapshot of the table. + /// In `snapshot` and `snapshot_and_follow` modes, it loads this snapshot. + /// In `follow` and `snapshot_and_follow` modes, it follows table updates + /// **after** this snapshot. + /// + /// Note: at most one of `snapshot_id` and `datetime` options can be specified. + /// When neither of the two options is specified, the latest committed version of the table + /// is used. + pub snapshot_id: Option, + + /// Optional timestamp for the snapshot in the ISO-8601/RFC-3339 format, e.g., + /// "2024-12-09T16:09:53+00:00". + /// + /// When this option is set, the connector finds and opens the snapshot of the table as of the + /// specified point in time (based on the server time recorded in the transaction + /// log, not the event time encoded in the data). In `snapshot` and `snapshot_and_follow` + /// modes, it retrieves this snapshot. In `follow` and `snapshot_and_follow` modes, it + /// follows transaction log records **after** this snapshot. + /// + /// Note: at most one of `snapshot_id` and `datetime` options can be specified. + /// When neither of the two options is specified, the latest committed version of the table + /// is used. + pub datetime: Option, + + /// Location of the table metadata JSON file. + /// + /// This propery is used to access an Iceberg table without a catalog. It is mutually + /// exclusive with the `catalog_type` property. + pub metadata_location: Option, + + /// Specifies the Iceberg table name in the "namespace.table" format. + /// + /// This option is applicable when an Iceberg catalog is configured using the `catalog_type` property. + pub table_name: Option, + + /// Specifies the catalog type used to access the Iceberg table. + /// + /// Supported options include "rest" and "glue". This property is mutually exclusive with `metadata_location`. + pub catalog_type: Option, + + #[serde(flatten)] + pub glue_catalog_config: GlueCatalogConfig, + + #[serde(flatten)] + pub rest_catalog_config: RestCatalogConfig, + + /// Storage options for configuring backend object store. + /// + /// See the [list of available options in PyIceberg documentation](https://py.iceberg.apache.org/configuration/#fileio). + #[serde(flatten)] + pub fileio_config: HashMap, +} + +impl IcebergReaderConfig { + pub fn validate_catalog_config(&self) -> Result<(), String> { + self.validate_metadata_location()?; + self.validate_table_name()?; + self.validate_glue_catalog_config()?; + self.validate_rest_catalog_config()?; + + Ok(()) + } + + /// Reject Glue catalog config properties when 'catalog_type' isn't set to 'glue'. + pub fn validate_glue_catalog_config(&self) -> Result<(), String> { + if self.catalog_type == Some(IcebergCatalogType::Glue) { + if self.glue_catalog_config.warehouse.is_none() { + return Err(r#"missing Iceberg warehouse location—set the 'glue.warehouse' property to the location of the Iceberg tables managed by the catalog (e.g., 's3://my-data-warehouse/tables/') when using "catalog_type" = "glue""#.to_string()); + } + } else { + ensure_glue_property_not_set(&self.glue_catalog_config.warehouse, "warehouse")?; + ensure_glue_property_not_set(&self.glue_catalog_config.endpoint, "uri")?; + ensure_glue_property_not_set(&self.glue_catalog_config.access_key_id, "access-key-id")?; + ensure_glue_property_not_set( + &self.glue_catalog_config.secret_access_key, + "secret-access-key", + )?; + ensure_glue_property_not_set(&self.glue_catalog_config.profile_name, "profile-name")?; + ensure_glue_property_not_set(&self.glue_catalog_config.region, "region")?; + ensure_glue_property_not_set(&self.glue_catalog_config.session_token, "session-token")?; + ensure_glue_property_not_set(&self.glue_catalog_config.id, "id")?; + } + + Ok(()) + } + + /// Reject Rest catalog config when 'catalog_type' isn't set to 'rest'. + pub fn validate_rest_catalog_config(&self) -> Result<(), String> { + if self.catalog_type == Some(IcebergCatalogType::Rest) { + if self.rest_catalog_config.uri.is_none() { + return Err(r#"missing Iceberg Rest catalog URI—set the 'rest.uri' property when using "catalog_type" = "rest""#.to_string()); + } + } else { + ensure_rest_property_not_set(&self.rest_catalog_config.uri, "uri")?; + ensure_rest_property_not_set(&self.rest_catalog_config.warehouse, "warehouse")?; + ensure_rest_property_not_set( + &self.rest_catalog_config.oauth2_server_uri, + "oauth2_server_uri", + )?; + ensure_rest_property_not_set(&self.rest_catalog_config.credential, "credential")?; + ensure_rest_property_not_set(&self.rest_catalog_config.token, "token")?; + ensure_rest_property_not_set(&self.rest_catalog_config.scope, "scope")?; + ensure_rest_property_not_set(&self.rest_catalog_config.prefix, "prefix")?; + ensure_rest_property_not_set(&self.rest_catalog_config.headers, "headers")?; + ensure_rest_property_not_set(&self.rest_catalog_config.audience, "audience")?; + ensure_rest_property_not_set(&self.rest_catalog_config.resource, "resource")?; + } + + Ok(()) + } + + /// Table name must be configured iff 'catalog_type' is set. + pub fn validate_table_name(&self) -> Result<(), String> { + if self.catalog_type.is_none() && self.table_name.is_some() { + Err("unexpected 'table_name' property: the 'table_name' property is valid only when an Iceberg catalog is configured using 'catalog_type'".to_string()) + } else if self.catalog_type.is_some() && self.table_name.is_none() { + Err("missing 'table_name' property—'table_name' must be specified when Iceberg catalog is configured using 'catalog_type'".to_string()) + } else { + Ok(()) + } + } + + /// 'metadata_location' must be configured iff 'catalog_type' is set. + pub fn validate_metadata_location(&self) -> Result<(), String> { + if self.catalog_type.is_none() && self.metadata_location.is_none() { + Err("missing metadata location: you must either specify an Iceberg catalog configuration by setting the 'catalog_type' property or provide a table metadata location directly via the 'metadata_location' property".to_string()) + } else if self.catalog_type.is_some() && self.metadata_location.is_some() { + Err("unexpected 'metadata_location' property: the 'metadata_location' property is not supported when an Iceberg catalog is configured using 'catalog_type'".to_string()) + } else { + Ok(()) + } + } +} + +fn ensure_glue_property_not_set(property: &Option, name: &str) -> Result<(), String> { + if property.is_some() { + Err(format!( + r#"unexpected 'glue.{name}' property—Glue catalog configuration properties are only valid when "catalog_type" = "glue""# + )) + } else { + Ok(()) + } +} + +fn ensure_rest_property_not_set(property: &Option, name: &str) -> Result<(), String> { + if property.is_some() { + Err(format!( + r#"unexpected 'rest.{name}' property—Rest catalog configuration properties are only valid when "catalog_type" = "rest""# + )) + } else { + Ok(()) + } +} + +impl IcebergReaderConfig { + /// `true` if the configuration requires taking an initial snapshot of the table. + pub fn snapshot(&self) -> bool { + matches!( + &self.mode, + IcebergIngestMode::Snapshot | IcebergIngestMode::SnapshotAndFollow + ) + } + + /// `true` if the configuration requires following the transaction log of the table + /// (possibly after taking an initial snapshot).s + pub fn follow(&self) -> bool { + matches!( + &self.mode, + IcebergIngestMode::SnapshotAndFollow | IcebergIngestMode::Follow + ) + } +} diff --git a/crates/feldera-types/src/transport/mod.rs b/crates/feldera-types/src/transport/mod.rs index 1d0d9d9a40..2a7961443d 100644 --- a/crates/feldera-types/src/transport/mod.rs +++ b/crates/feldera-types/src/transport/mod.rs @@ -3,6 +3,7 @@ pub mod datagen; pub mod delta_table; pub mod file; pub mod http; +pub mod iceberg; pub mod kafka; pub mod nexmark; pub mod pubsub; diff --git a/crates/iceberg/Cargo.toml b/crates/iceberg/Cargo.toml new file mode 100644 index 0000000000..d515bed0b9 --- /dev/null +++ b/crates/iceberg/Cargo.toml @@ -0,0 +1,20 @@ +[package] +name = "feldera-iceberg" +version = "0.1.0" +edition = "2021" + +[dependencies] +feldera-types = { path = "../feldera-types" } +feldera-adapterlib = { path = "../adapterlib" } +dbsp = { path = "../dbsp" } +anyhow = { version = "1.0.57", features = ["backtrace"] } +tokio = { version = "1.25.0", features = ["sync", "rt"] } +datafusion = { version = "43" } +log = "0.4.20" +iceberg = { git = "https://github.com/apache/iceberg-rust.git", rev = "2e0b646" } +iceberg-datafusion = { git = "https://github.com/apache/iceberg-rust.git", rev = "2e0b646" } +iceberg-catalog-glue = { git = "https://github.com/apache/iceberg-rust.git", rev = "2e0b646" } +iceberg-catalog-rest = { git = "https://github.com/apache/iceberg-rust.git", rev = "2e0b646" } +chrono = { version = "0.4.38" } +serde_json = { version = "1.0.127" } +futures-util = "0.3.30" diff --git a/crates/iceberg/src/input.rs b/crates/iceberg/src/input.rs new file mode 100644 index 0000000000..8231dafcce --- /dev/null +++ b/crates/iceberg/src/input.rs @@ -0,0 +1,860 @@ +use crate::iceberg_input_serde_config; +use anyhow::{anyhow, bail, Error as AnyError, Result as AnyResult}; +use chrono::{DateTime, Utc}; +use datafusion::{ + arrow::array::AsArray, + prelude::{DataFrame, SQLOptions, SessionContext}, +}; +use dbsp::circuit::tokio::TOKIO; +use feldera_adapterlib::{ + catalog::{ArrowStream, InputCollectionHandle}, + errors::metadata::ControllerError, + format::ParseError, + transport::{ + InputConsumer, InputEndpoint, InputQueue, InputReader, InputReaderCommand, + IntegratedInputEndpoint, NonFtInputReaderCommand, + }, + utils::datafusion::{ + execute_query_collect, execute_singleton_query, timestamp_to_sql_expression, + validate_sql_expression, validate_timestamp_column, + }, + PipelineState, +}; +use feldera_types::{ + program_schema::Relation, + transport::iceberg::{IcebergCatalogType, IcebergReaderConfig}, +}; +use futures_util::StreamExt; +use iceberg::{io::FileIO, spec::TableMetadata, table::Table as IcebergTable, Catalog, TableIdent}; +use iceberg_catalog_glue::{ + GlueCatalog, GlueCatalogConfig, AWS_ACCESS_KEY_ID, AWS_PROFILE_NAME, AWS_REGION_NAME, + AWS_SECRET_ACCESS_KEY, AWS_SESSION_TOKEN, +}; +use iceberg_catalog_rest::{RestCatalog, RestCatalogConfig}; +use iceberg_datafusion::IcebergTableProvider; +use log::{debug, info, trace}; +use std::sync::Arc; +use tokio::{ + select, + sync::{ + mpsc, + watch::{channel, Receiver, Sender}, + }, +}; + +enum SnapshotDescr { + /// Open the latest snapshot (default) + Latest, + /// Open specific snapshot id. + SnapshotId(i64), + /// Open + Timestamp(DateTime), +} + +/// Integrated input connector that reads from an Iceberg table. +pub struct IcebergInputEndpoint { + inner: Arc, +} + +impl IcebergInputEndpoint { + pub fn new( + endpoint_name: &str, + config: &IcebergReaderConfig, + consumer: Box, + ) -> Self { + Self { + inner: Arc::new(IcebergInputEndpointInner::new( + endpoint_name, + config.clone(), + consumer, + )), + } + } +} + +impl InputEndpoint for IcebergInputEndpoint { + fn is_fault_tolerant(&self) -> bool { + false + } +} + +impl IntegratedInputEndpoint for IcebergInputEndpoint { + fn open(&self, input_handle: &InputCollectionHandle) -> AnyResult> { + Ok(Box::new(IcebergInputReader::new( + &self.inner, + input_handle, + )?)) + } +} + +struct IcebergInputReader { + sender: Sender, + inner: Arc, +} + +impl IcebergInputReader { + fn new( + endpoint: &Arc, + input_handle: &InputCollectionHandle, + ) -> AnyResult { + // TODO: perform validation as part of config deserialization. + endpoint + .config + .validate_catalog_config() + .map_err(|e| anyhow!(e))?; + + if endpoint.config.follow() { + bail!("'{}' mode is not yet supported", endpoint.config.mode); + } + + let (sender, receiver) = channel(PipelineState::Paused); + let endpoint_clone = endpoint.clone(); + let receiver_clone = receiver.clone(); + + // Used to communicate the status of connector initialization. + let (init_status_sender, mut init_status_receiver) = + mpsc::channel::>(1); + + let input_stream = input_handle + .handle + .configure_arrow_deserializer(iceberg_input_serde_config())?; + let schema = input_handle.schema.clone(); + + std::thread::spawn(move || { + TOKIO.block_on(async { + let _ = endpoint_clone + .worker_task(input_stream, schema, receiver_clone, init_status_sender) + .await; + }) + }); + + init_status_receiver.blocking_recv().ok_or_else(|| { + anyhow!("worker thread terminated unexpectedly during initialization") + })??; + + Ok(Self { + sender, + inner: endpoint.clone(), + }) + } +} + +impl InputReader for IcebergInputReader { + fn request(&self, command: InputReaderCommand) { + match command.as_nonft().unwrap() { + NonFtInputReaderCommand::Queue => self.inner.queue.queue(), + NonFtInputReaderCommand::Transition(state) => drop(self.sender.send_replace(state)), + } + } + + fn is_closed(&self) -> bool { + self.inner.queue.is_empty() && self.sender.is_closed() + } +} + +impl Drop for IcebergInputReader { + fn drop(&mut self) { + self.disconnect(); + } +} + +struct IcebergInputEndpointInner { + endpoint_name: String, + config: IcebergReaderConfig, + consumer: Box, + datafusion: SessionContext, + queue: InputQueue, +} + +impl IcebergInputEndpointInner { + fn new( + endpoint_name: &str, + config: IcebergReaderConfig, + consumer: Box, + ) -> Self { + let queue = InputQueue::new(consumer.clone()); + Self { + endpoint_name: endpoint_name.to_string(), + config, + consumer, + datafusion: SessionContext::new(), + queue, + } + } + + fn table_ident(&self) -> Option> { + self.config.table_name.as_ref().map(|table_name| { + TableIdent::from_strs(table_name.split('.')).map_err(|e| { + ControllerError::invalid_transport_configuration( + &self.endpoint_name, + &format!("'table_name' property specifies an invalid Iceberg table name: {e}"), + ) + }) + }) + } + + fn snapshot_descr(&self) -> Result { + match &self.config { + IcebergReaderConfig { + snapshot_id: Some(_), + datetime: Some(_), + .. + } => Err(ControllerError::invalid_transport_configuration( + &self.endpoint_name, + "at most one of 'snapshot_id' and 'datetime' options can be specified", + )), + IcebergReaderConfig { + snapshot_id: None, + datetime: None, + .. + } => Ok(SnapshotDescr::Latest), + IcebergReaderConfig { + snapshot_id: Some(snapshot_id), + datetime: None, + .. + } => Ok(SnapshotDescr::SnapshotId(*snapshot_id)), + IcebergReaderConfig { + snapshot_id: None, + datetime: Some(datetime), + .. + } => { + let ts = DateTime::parse_from_rfc3339(datetime) + .map_err(|e| { + ControllerError::invalid_transport_configuration( + &self.endpoint_name, + &format!( + "invalid 'datetime' format (expected ISO-8601/RFC-3339 timestamp): {e}" + ), + ) + })? + .to_utc(); + Ok(SnapshotDescr::Timestamp(ts)) + } + } + } + + async fn worker_task( + self: Arc, + input_stream: Box, + schema: Relation, + receiver: Receiver, + init_status_sender: mpsc::Sender>, + ) { + let mut receiver_clone = receiver.clone(); + select! { + _ = Self::worker_task_inner(self.clone(), input_stream, schema, receiver, init_status_sender) => { + debug!("iceberg {}: worker task terminated", + &self.endpoint_name, + ); + } + _ = receiver_clone.wait_for(|state| state == &PipelineState::Terminated) => { + debug!("iceberg {}: received termination command; worker task canceled", + &self.endpoint_name, + ); + } + } + } + + /// Load the entire table snapshot as a single "select * where " query. + async fn read_unordered_snapshot( + &self, + input_stream: &mut dyn ArrowStream, + receiver: &mut Receiver, + ) { + // Execute the snapshot query; push snapshot data to the circuit. + info!("iceberg {}: reading initial snapshot", &self.endpoint_name,); + + let mut snapshot_query = "select * from snapshot".to_string(); + if let Some(filter) = &self.config.snapshot_filter { + snapshot_query = format!("{snapshot_query} where {filter}"); + } + + self.execute_snapshot_query(&snapshot_query, "initial snapshot", input_stream, receiver) + .await; + + //let _ = self.datafusion.deregister_table("snapshot"); + info!( + "iceberg {}: finished reading initial snapshot", + &self.endpoint_name, + ); + } + + async fn read_ordered_snapshot( + &self, + input_stream: &mut dyn ArrowStream, + schema: &Relation, + receiver: &mut Receiver, + ) { + self.read_ordered_snapshot_inner(input_stream, schema, receiver) + .await + .unwrap_or_else(|e| self.consumer.error(true, e)); + } + + async fn read_ordered_snapshot_inner( + &self, + input_stream: &mut dyn ArrowStream, + schema: &Relation, + receiver: &mut Receiver, + ) -> Result<(), AnyError> { + let timestamp_column = self.config.timestamp_column.as_ref().unwrap(); + + let timestamp_field = schema.field(timestamp_column).unwrap(); + + // The following unwraps are safe, as validated in `validate_timestamp_column`. + let lateness = timestamp_field.lateness.as_ref().unwrap(); + + // Query the table for min and max values of the timestamp column that satisfy the filter. + let bounds_query = + format!("select * from (select cast(min({timestamp_column}) as string) as start_ts, cast(max({timestamp_column}) as string) as end_ts from snapshot {}) where start_ts is not null", + if let Some(filter) = &self.config.snapshot_filter { + format!("where {filter}") + } else { + String::new() + }); + + let bounds = execute_query_collect(&self.datafusion, &bounds_query).await?; + + info!( + "iceberg {}: querying the table for min and max timestamp values", + &self.endpoint_name, + ); + + if bounds.len() != 1 || bounds[0].num_rows() != 1 { + info!( + "iceberg {}: initial snapshot is empty; the Delta table contains no records{}", + &self.endpoint_name, + if let Some(filter) = &self.config.snapshot_filter { + format!(" that satisfy the filter condition '{filter}'") + } else { + String::new() + } + ); + return Ok(()); + } + + if bounds[0].num_columns() != 2 { + // Should never happen. + return Err(anyhow!( + "internal error: query '{bounds_query}' returned a result with {} columns; expected 2 columns", + bounds[0].num_columns() + )); + } + + let min = bounds[0] + .column(0) + .as_string_opt::() + .ok_or_else(|| anyhow!("internal error: cannot retrieve the output of query '{bounds_query}' as a string"))? + .value(0) + .to_string(); + + let max = bounds[0].column(1).as_string::().value(0).to_string(); + + info!( + "iceberg {}: reading table snapshot in the range '{min} <= {timestamp_column} <= {max}'", + &self.endpoint_name, + ); + + let min = timestamp_to_sql_expression(×tamp_field.columntype, &min); + let max = timestamp_to_sql_expression(×tamp_field.columntype, &max); + + let mut start = min.clone(); + let mut done = "false".to_string(); + + while &done != "true" { + // Evaluate SQL expression for the new end of the interval. + let end = execute_singleton_query( + &self.datafusion, + &format!("select cast(({start} + {lateness}) as string)"), + ) + .await?; + let end = timestamp_to_sql_expression(×tamp_field.columntype, &end); + + // Query the table for the range. + let mut range_query = + format!("select * from snapshot where {timestamp_column} >= {start} and {timestamp_column} < {end}"); + if let Some(filter) = &self.config.snapshot_filter { + range_query = format!("{range_query} and {filter}"); + } + + self.execute_snapshot_query(&range_query, "range", input_stream, receiver) + .await; + + start = end.clone(); + + done = execute_singleton_query( + &self.datafusion, + &format!("select cast({start} > {max} as string)"), + ) + .await?; + } + + Ok(()) + } + + async fn worker_task_inner( + self: Arc, + mut input_stream: Box, + schema: Relation, + mut receiver: Receiver, + init_status_sender: mpsc::Sender>, + ) { + let table = match self.open_table().await { + Err(e) => { + let _ = init_status_sender.send(Err(e)).await; + return; + } + Ok(table) => table, + }; + + let table = Arc::new(table); + + if let Err(e) = self.prepare_snapshot_query(&table, &schema).await { + let _ = init_status_sender.send(Err(e)).await; + return; + }; + + // Code before this point is part of endpoint initialization. + // After this point, the thread should continue running until it receives a + // shutdown command from the controller. + let _ = init_status_sender.send(Ok(())).await; + + if self.config.snapshot() && self.config.timestamp_column.is_none() { + // Read snapshot chunk-by-chunk. + self.read_unordered_snapshot(input_stream.as_mut(), &mut receiver) + .await; + } else if self.config.snapshot() { + // Read the entire snapshot in one query. + self.read_ordered_snapshot(input_stream.as_mut(), &schema, &mut receiver) + .await; + }; + + self.consumer.eoi(); + } + + /// Open existing iceberg table. Use snapshot id or timestamp specified in the configuration, if any. + async fn open_table(&self) -> Result { + debug!("iceberg {}: opening iceberg table", &self.endpoint_name); + + match self.config.catalog_type { + None => self.open_table_no_catalog().await, + Some(IcebergCatalogType::Glue) => self.open_table_glue().await, + Some(IcebergCatalogType::Rest) => self.open_table_rest().await, + } + + // // TODO: Validate that table schema matches relation schema + + // // TODO: Validate that timestamp is a valid column. + } + + async fn open_table_no_catalog(&self) -> Result { + // Safe due to checks in 'validate_catalog_config'. + let metadata_location = self.config.metadata_location.as_ref().unwrap(); + + let file_io = FileIO::from_path(metadata_location) + .map_err(|e| { + ControllerError::invalid_transport_configuration( + &self.endpoint_name, + &format!("invalid 'metadata_location' value: {e}"), + ) + })? + .with_props(&self.config.fileio_config) + .build() + .map_err(|e| { + ControllerError::invalid_transport_configuration( + &self.endpoint_name, + &format!("invalid storage configuration: {e}"), + ) + })?; + + let metadata_file = file_io.new_input(metadata_location).map_err(|e| { + ControllerError::invalid_transport_configuration( + &self.endpoint_name, + &format!("error opening metadata file at '{metadata_location}': {e}"), + ) + })?; + let metadata_content = metadata_file.read().await.map_err(|e| { + ControllerError::invalid_transport_configuration( + &self.endpoint_name, + &format!("error reading metadatafile '{metadata_location}': {e}"), + ) + })?; + let metadata = serde_json::from_slice::(&metadata_content).map_err(|e| { + ControllerError::invalid_transport_configuration( + &self.endpoint_name, + &format!("error parsing table metadata: {e}"), + ) + })?; + + let table_ident = TableIdent::from_strs(["default", "table"]).unwrap(); + + IcebergTable::builder() + .file_io(file_io) + .metadata_location(metadata_location) + .metadata(metadata) + .identifier(table_ident) + .build() + .map_err(|e| { + ControllerError::invalid_transport_configuration( + &self.endpoint_name, + &format!("error configuring Iceberg table: {e}"), + ) + }) + } + + async fn open_table_glue(&self) -> Result { + let builder = GlueCatalogConfig::builder().warehouse( + self.config + .glue_catalog_config + .warehouse + .as_ref() + .unwrap() + .clone(), + ); + + let builder = builder.catalog_id_opt(self.config.glue_catalog_config.id.clone()); + let builder = builder.uri_opt(self.config.glue_catalog_config.endpoint.clone()); + + let mut props = self.config.fileio_config.clone(); + + self.config + .glue_catalog_config + .access_key_id + .as_ref() + .map(|aws_access_key_id| { + props.insert(AWS_ACCESS_KEY_ID.to_string(), aws_access_key_id.clone()) + }); + + self.config + .glue_catalog_config + .secret_access_key + .as_ref() + .map(|aws_secret_access_key| { + props.insert( + AWS_SECRET_ACCESS_KEY.to_string(), + aws_secret_access_key.clone(), + ) + }); + + self.config + .glue_catalog_config + .session_token + .as_ref() + .map(|session_token| { + props.insert(AWS_SESSION_TOKEN.to_string(), session_token.clone()) + }); + + self.config + .glue_catalog_config + .profile_name + .as_ref() + .map(|profile_name| props.insert(AWS_PROFILE_NAME.to_string(), profile_name.clone())); + + self.config + .glue_catalog_config + .region + .as_ref() + .map(|region_name| props.insert(AWS_REGION_NAME.to_string(), region_name.clone())); + + let builder = builder.props(props); + + let catalog_config = builder.build(); + + let catalog = GlueCatalog::new(catalog_config).await.map_err(|e| { + ControllerError::input_transport_error( + &self.endpoint_name, + true, + anyhow!("error creating Glue catalog client: {e}"), + ) + })?; + + let table_ident = self.table_ident().unwrap()?; + + catalog.load_table(&table_ident).await.map_err(|e| { + ControllerError::input_transport_error( + &self.endpoint_name, + true, + anyhow!("error loading Iceberg table: {e}"), + ) + }) + } + + async fn open_table_rest(&self) -> Result { + let builder = RestCatalogConfig::builder().uri( + self.config + .rest_catalog_config + .uri + .as_ref() + .unwrap() + .clone(), + ); + + let builder = builder.warehouse_opt(self.config.rest_catalog_config.warehouse.clone()); + + let mut props = self.config.fileio_config.clone(); + + self.config + .rest_catalog_config + .audience + .as_ref() + .map(|audience| props.insert("audience".to_string(), audience.clone())); + + self.config + .rest_catalog_config + .resource + .as_ref() + .map(|resource| props.insert("resource".to_string(), resource.clone())); + + self.config + .rest_catalog_config + .credential + .as_ref() + .map(|credential| props.insert("credential".to_string(), credential.clone())); + + self.config + .rest_catalog_config + .oauth2_server_uri + .as_ref() + .map(|oauth2_server_uri| { + props.insert("oauth2-server-uri".to_string(), oauth2_server_uri.clone()) + }); + + self.config + .rest_catalog_config + .prefix + .as_ref() + .map(|prefix| props.insert("prefix".to_string(), prefix.clone())); + + self.config + .rest_catalog_config + .scope + .as_ref() + .map(|scope| props.insert("scope".to_string(), scope.clone())); + + self.config + .rest_catalog_config + .token + .as_ref() + .map(|token| props.insert("token".to_string(), token.clone())); + + if let Some(headers) = &self.config.rest_catalog_config.headers { + for (header, val) in headers.iter() { + props.insert(format!("header.{header}"), val.clone()); + } + }; + + let builder = builder.props(props); + + let catalog_config = builder.build(); + + let catalog = RestCatalog::new(catalog_config); + + let table_ident = self.table_ident().unwrap()?; + + catalog.load_table(&table_ident).await.map_err(|e| { + ControllerError::input_transport_error( + &self.endpoint_name, + true, + anyhow!("error loading Iceberg table: {e}"), + ) + }) + } + + /// Validate the filter expression specified in the 'snapshot_filter' parameter. + fn validate_snapshot_filter(&self) -> Result<(), ControllerError> { + if let Some(filter) = &self.config.snapshot_filter { + validate_sql_expression(filter).map_err(|e| { + ControllerError::invalid_transport_configuration( + &self.endpoint_name, + &format!("error parsing 'snapshot_filter' expression '{filter}': {e}"), + ) + })?; + } + + Ok(()) + } + + /// Prepare to read initial snapshot, if required by endpoint configuration. + /// + /// * register snapshot as a datafusion table + /// * validate snapshot config: filter condition and timestamp column + async fn prepare_snapshot_query( + &self, + table: &IcebergTable, + schema: &Relation, + ) -> Result<(), ControllerError> { + if !self.config.snapshot() { + return Ok(()); + } + + trace!( + "iceberg {}: registering table with Datafusion", + &self.endpoint_name, + ); + + let snapshot_id = match self.snapshot_descr()? { + SnapshotDescr::SnapshotId(snapshot_id) => Some(snapshot_id), + SnapshotDescr::Timestamp(ts) => { + let ts_ms = ts.timestamp_millis(); + let snapshot_log = table + .metadata() + .history() + .iter() + .rev() + .find(|log| log.timestamp_ms() <= ts_ms); + if let Some(snapshot_log) = snapshot_log { + Some(snapshot_log.snapshot_id) + } else { + return Err(ControllerError::input_transport_error( + &self.endpoint_name, + true, + anyhow!("Iceberg connector configuration specifies timestamp {ts}; however Iceberg table does not contain a snapshot with the same or earlier timestamp"), + )); + } + } + SnapshotDescr::Latest => None, + }; + + let provider = match snapshot_id { + Some(snapshot_id) => { + IcebergTableProvider::try_new_from_table_snapshot(table.clone(), snapshot_id).await + } + None => IcebergTableProvider::try_new_from_table(table.clone()).await, + } + .map_err(|e| { + ControllerError::invalid_transport_configuration( + &self.endpoint_name, + &format!("error creating Datafusion table provider: {e}"), + ) + })?; + + self.datafusion + .register_table("snapshot", Arc::new(provider)) + .map_err(|e| { + ControllerError::input_transport_error( + &self.endpoint_name, + true, + anyhow!("failed to register table snapshot with datafusion: {e}"), + ) + })?; + + self.validate_snapshot_filter()?; + + if let Some(timestamp_column) = &self.config.timestamp_column { + validate_timestamp_column( + &self.endpoint_name, + timestamp_column, + &self.datafusion, + schema, + "see Iceberg connector documentation for more details: https://docs.feldera.com/connectors/sources/iceberg" + ) + .await?; + }; + + Ok(()) + } + + /// Execute a SQL query to load a complete or partial snapshot of the table. + async fn execute_snapshot_query( + &self, + query: &str, + descr: &str, + input_stream: &mut dyn ArrowStream, + receiver: &mut Receiver, + ) { + let descr = format!("{descr} query '{query}'"); + debug!( + "iceberg {}: retrieving data from the Iceberg table snapshot using {descr}", + &self.endpoint_name, + ); + + let options: SQLOptions = SQLOptions::new() + .with_allow_ddl(false) + .with_allow_dml(false); + + let df = match self.datafusion.sql_with_options(query, options).await { + Ok(df) => df, + Err(e) => { + self.consumer + .error(true, anyhow!("error compiling query '{query}': {e}")); + return; + } + }; + + self.execute_df(df, true, &descr, input_stream, receiver) + .await; + } + + /// Execute a prepared dataframe and push data from it to the circuit. + /// + /// * `polarity` - determines whether records in the dataframe should be + /// inserted to or deleted from the table. + /// + /// * `descr` - dataframe description used to construct error message. + /// + /// * `input_stream` - handle to push updates to. + /// + /// * `receiver` - used to block the function until the endpoint is unpaused. + async fn execute_df( + &self, + dataframe: DataFrame, + polarity: bool, + descr: &str, + input_stream: &mut dyn ArrowStream, + receiver: &mut Receiver, + ) { + wait_running(receiver).await; + + let mut stream = match dataframe.execute_stream().await { + Err(e) => { + self.consumer + .error(true, anyhow!("error retrieving {descr}: {e:?}")); + return; + } + Ok(stream) => stream, + }; + + let mut num_batches = 0; + while let Some(batch) = stream.next().await { + wait_running(receiver).await; + let batch = match batch { + Ok(batch) => batch, + Err(e) => { + self.consumer.error( + false, + anyhow!("error retrieving batch {num_batches} of {descr}: {e:?}"), + ); + continue; + } + }; + // info!("schema: {}", batch.schema()); + num_batches += 1; + let bytes = batch.get_array_memory_size(); + let result = if polarity { + input_stream.insert(&batch) + } else { + input_stream.delete(&batch) + }; + let errors = result.map_or_else( + |e| { + vec![ParseError::bin_envelope_error( + format!("error deserializing table records from Parquet data: {e}"), + &[], + None, + )] + }, + |()| Vec::new(), + ); + self.queue.push((input_stream.take_all(), errors), bytes); + } + } +} + +/// Block until the state is `Running`. +async fn wait_running(receiver: &mut Receiver) { + // An error indicates that the channel was closed. It's ok to ignore + // the error as this situation will be handled by the top-level select, + // which will abort the worker thread. + let _ = receiver + .wait_for(|state| state == &PipelineState::Running) + .await; +} diff --git a/crates/iceberg/src/lib.rs b/crates/iceberg/src/lib.rs new file mode 100644 index 0000000000..c1e278af11 --- /dev/null +++ b/crates/iceberg/src/lib.rs @@ -0,0 +1,22 @@ +mod input; + +pub use input::IcebergInputEndpoint; + +use feldera_types::serde_with_context::{ + serde_config::DecimalFormat, DateFormat, SqlSerdeConfig, TimestampFormat, +}; + +pub fn iceberg_input_serde_config() -> SqlSerdeConfig { + SqlSerdeConfig::default() + // Iceberg supports microsecond or nanosecond timestamps. + // `serde_arrow` knows the correct type from the Arrow schema, and its + // `Deserializer` implementation is nice enough to return the timestamp + // formatted as string if the `Deserialize` implementation asks for it + // (by calling `deserialize_str`), so we rely on that instead of trying + // to deserialize the timestamp as an integer. A better solution would + // require a more flexible SqlSerdeConfig type that would specify a + // schema per field. + .with_timestamp_format(TimestampFormat::String("%Y-%m-%dT%H:%M:%S%.f%Z")) + .with_date_format(DateFormat::DaysSinceEpoch) + .with_decimal_format(DecimalFormat::String) +} diff --git a/crates/iceberg/src/test/README.md b/crates/iceberg/src/test/README.md new file mode 100644 index 0000000000..a89921ad1a --- /dev/null +++ b/crates/iceberg/src/test/README.md @@ -0,0 +1,51 @@ +# Iceberg connector test harness + +Iceberg connector tests live in `adapters/srs/tests/iceberg.rs`. Since Iceberg's +Rust ecosystem is not yet self-contained, tests rely on several external components +and are feature gated using: + +* `iceberg-tests-fs` - enables tests using local file system +* `iceberg-tests-glue` - enables tests using S3 and AWS Glue catalog +* `iceberg-tests-rest` - enables tests using S3 and Rest catalog. + +The `iceberg-rust` crate does not yes support writing Iceberg tables; therefore +we use `pyiceberg` to create test tables for the Iceberg source connector. The +`create_test_tables_s3.py` script in this directory can be used to create an +Iceberg table. By default it creates the table in the local FS. It can also be +used with the `--catalog glue` flag to create a table in S3 using AWS Glue catalog. +We used the latter feature to create a test table in +`s3://feldera-iceberg-test/test_table`. This table is used to run tests with +Glue and REST catalogs (see below). + +## FS-based tests + +These tests create an Iceberg table in the local file syste using the +`create_test_tables_s3.py` script and read this table using different configurations. +Before running the tests, make sure that you have Python dependencies listed +in `./requirements.txt` installed: + +* `pip install -r crates/iceberg/src/test/requirements.txt` +* Run the following command in the `adapters` crate: `cargo test --features="iceberg-tests-fs" iceberg` + +## Glue catalog test + +* Set `ICEBERG_TEST_AWS_ACCESS_KEY_ID` and `ICEBERG_TEST_AWS_SECRET_ACCESS_KEY` environment + variables to AWS credentials of an IAM account that has access to the Glue catalog and + S3 buckets used by Feldera CI (talk to leonid@feldera.com). +* Run the following command in the `adapters` crate: `cargo test --features="iceberg-tests-glue" iceberg_glue_s3_input_test` + +## Rest catalog test + +In order to run this test, we need an Iceberg REST catalog implementation. The AWS Glue catalog +provides one, but unfortunately it is not currently usable with the `iceberg-rust` crate, which +does not yet implement the AWS SigV4 protocol (see SigV4-related config options here: +https://py.iceberg.apache.org/configuration/#rest-catalog). Therefore we instead use a standalone +implementation of the REST catalog from Databricks and bind it to the Glue catalog as the backend: + +* Create catalog: `docker run -e AWS_REGION=us-east-1 -e AWS_ACCESS_KEY_ID= -e AWS_SECRET_ACCESS_KEY= -e CATALOG_CATALOG__IMPL=org.apache.iceberg.aws.glue.GlueCatalog -p 8181:8181 tabulario/iceberg-rest:0.1.0` + +* `cargo test --features="iceberg-tests-rest" iceberg_rest_s3_input_test` + +# Running tests in CI + +Currently only Glue and FS-based tests run in CI. diff --git a/crates/iceberg/src/test/create_test_table_s3.py b/crates/iceberg/src/test/create_test_table_s3.py new file mode 100644 index 0000000000..4d62bb1f0e --- /dev/null +++ b/crates/iceberg/src/test/create_test_table_s3.py @@ -0,0 +1,229 @@ +# Script used to create a table that matches the defintion of TestStruct2 + +from decimal import Decimal +import random +from pyiceberg.catalog.sql import SqlCatalog +from pyiceberg.schema import Schema +from pyiceberg.types import ( + BooleanType, + DateType, + LongType, + MapType, + StringType, + StructType, + NestedField, + TimeType, + TimestampType, + IntegerType, + FloatType, + DoubleType, + DecimalType, + BinaryType, + FixedType +) +from pyiceberg.partitioning import PartitionSpec, PartitionField +from pyiceberg.transforms import DayTransform +from datetime import time, timedelta + +import datetime +import os +import sys +import pyarrow as pa +import pandas as pd +import numpy as np +import argparse + +parser = argparse.ArgumentParser( + description="Create an Iceberg table populated with random data" +) + +parser.add_argument( + "--catalog", + choices=["glue", "rest", "sql"], + default="sql", + help="Catalog type (default: sql)", +) +parser.add_argument( + "--warehouse-path", + default="/tmp/warehouse", + help="Location to create the warehouse; only used in conjunction with '--catalog=sql' in (default: /tmp/warehouse)", +) +parser.add_argument( + "--rows", + type=int, + default=1000000, + help="Number of rows to generate (default: 1000000)", +) +parser.add_argument("--json-file", help="JSON file to load data from") + + +args = parser.parse_args() + + +if args.catalog == "glue": + from pyiceberg.catalog.glue import GlueCatalog + + aws_access_key_id = os.getenv("AWS_ACCESS_KEY_ID") + aws_secret_access_key = os.getenv("AWS_SECRET_ACCESS_KEY") + + if not aws_access_key_id: + print("Error: AWS_ACCESS_KEY_ID is not set") + sys.exit(1) + + if not aws_secret_access_key: + print("Error: AWS_SECRET_ACCESS_KEY is not set") + sys.exit(1) + + print("Connecting to Glue catalog") + catalog = GlueCatalog( + "glue", + **{ + "glue.access-key-id": aws_access_key_id, + "glue.secret-access-key": aws_secret_access_key, + "glue.region": "us-east-1", + "s3.access-key-id": aws_access_key_id, + "s3.secret-access-key": aws_secret_access_key, + "s3.region": "us-east-1", + }, + ) + location = "s3://feldera-iceberg-test/test_table" +elif args.catalog == "rest": + print("REST catalog not yet supported") + exit(1) +else: + warehouse_path = args.warehouse_path + location = f"{warehouse_path}/test_table" + + print(f"Creating SQL catalog at {warehouse_path}") + + catalog = SqlCatalog( + "sql", + **{ + "uri": f"sqlite:///{warehouse_path}/pyiceberg_catalog.db", + "warehouse": f"file://{warehouse_path}", + }, + ) + + try: + catalog.create_namespace("iceberg_test") + except: + pass + +# Iceberg schema (matches `IcebergTestStruct`) +schema = Schema( + NestedField(1, "b", BooleanType(), required=True), + NestedField(2, "i", IntegerType(), required=True), + NestedField(3, "l", LongType(), required=True), + NestedField(4, "r", FloatType(), required=True), + NestedField(5, "d", DoubleType(), required=True), + NestedField(6, "dec", DecimalType(10, 3), required=True), + NestedField(7, "dt", DateType(), required=True), + NestedField(8, "tm", TimeType(), required=True), + NestedField(9, "ts", TimestampType(), required=True), + NestedField(10, "s", StringType(), required=True), + # NestedField(11, "uuid", UUIDType(), required=True), + NestedField(11, "fixed", FixedType(5), required=True), + NestedField(12, "varbin", BinaryType(), required=True), +) + +# Equivalent arrow schema +arrow_schema = pa.schema( + [ + pa.field("b", pa.bool_(), nullable=False), + pa.field("i", pa.int32(), nullable=False), + pa.field("l", pa.int64(), nullable=False), + pa.field("r", pa.float32(), nullable=False), + pa.field("d", pa.float64(), nullable=False), + pa.field("dec", pa.decimal128(10, 3), nullable=False), + pa.field("dt", pa.date32(), nullable=False), + pa.field("tm", pa.time64("us"), nullable=False), + pa.field("ts", pa.timestamp("us"), nullable=False), + pa.field("s", pa.string(), nullable=False), + # pa.field("uuid", pa.binary(16), nullable=False), + pa.field("fixed", pa.binary(5), nullable=False), + pa.field("varbin", pa.binary(), nullable=False), + ] +) + +partition_spec = PartitionSpec( + PartitionField(source_id=9, field_id=1000, transform=DayTransform(), name="date") +) + +try: + print("Deleting existing table, if any") + catalog.drop_table("iceberg_test.test_table") +except: + pass + +print("Creating Iceberg table") + +table = catalog.create_table( + "iceberg_test.test_table", schema, location=location, partition_spec=partition_spec +) + +# Number of records +num_records = args.rows + +print(f"Generating {num_records} rows") + +# Generate random data +if args.json_file: + with open(args.json_file, "r") as file: + for i, line in enumerate(file): + print(line) + if i == 4: + break + + pandas_df = pd.read_json(args.json_file, lines=True) + pandas_df["tm"] = pd.to_datetime(pandas_df["tm"]).dt.time + pandas_df["ts"] = pd.to_datetime(pandas_df["ts"]) + pandas_df["dt"] = pd.to_datetime(pandas_df["dt"]).dt.date + pandas_df['dec'] = pandas_df["dec"].apply(lambda x: Decimal(f"{x:.3f}")) + # pandas_df['uuid'] = pandas_df['uuid'].apply(lambda x: bytes(x)) + pandas_df['fixed'] = pandas_df['fixed'].apply(lambda x: bytes(x)) + pandas_df['varbin'] = pandas_df['varbin'].apply(lambda x: bytes(x)) + +else: + # Generate a range of dates between 2024-01-01 and 2024-12-31 + date_range = pd.date_range(start='2024-01-01', end='2024-12-31') + + data = { + "b": np.random.choice([True, False], size=num_records), # Boolean + "i": np.arange(1, num_records + 1, dtype=np.int32), + "l": np.random.randint(np.iinfo(np.int64).min, np.iinfo(np.int64).max, size=num_records, dtype=np.int64), # int64 + "r": np.random.uniform(-1e6, 1e6, size=num_records).astype(np.float32), # float32 + "d": np.random.uniform(-1e12, 1e12, size=num_records).astype(np.float64), # float64 + "dec": [Decimal(random.uniform(-1e5, 1e5)).quantize(Decimal("0.001")) for _ in range(num_records)], + #"dt": pd.date_range(start="2000-01-01", periods=num_records, freq="D").date, # date32 + "dt": [date_range[i % len(date_range)] for i in range(num_records)], + "tm": [time(random.randint(0, 23), random.randint(0, 59), random.randint(0, 59), random.randint(0, 999999)) for _ in range(num_records)], + "ts": [ + datetime.datetime(2023, 1, 1) + datetime.timedelta(seconds=i) + for i in range(num_records) + ], + "s": [f"string_{i}" for i in range(num_records)], # string + # "uuid": [uuid.uuid4().bytes for _ in range(num_records)], # binary(16) - UUID + "fixed": [np.random.bytes(5) for _ in range(num_records)], # fixed binary(5) + "varbin": [np.random.bytes(np.random.randint(1, 20)) for _ in range(num_records)] # variable-length binary + } + + # Create the DataFrame + pandas_df = pd.DataFrame(data) + + #print(pandas_df.head()) + +print("Generating Pandas dataframe") + +# pyiceberg does not support nanosecond timestamps +pandas_df["ts"] = pandas_df["ts"].astype("datetime64[us]") + +print("Converting Pandas dataframe to Arrow") + +arrow_table = pa.Table.from_pandas(pandas_df, schema=arrow_schema) + +print("Writing data to Iceberg table") + +table.append(arrow_table) + +# Extract and print metadata location. +print(table.inspect.metadata_log_entries().column("file")[-1].as_py()) diff --git a/crates/iceberg/src/test/requirements.ci.txt b/crates/iceberg/src/test/requirements.ci.txt new file mode 100644 index 0000000000..66a8023764 --- /dev/null +++ b/crates/iceberg/src/test/requirements.ci.txt @@ -0,0 +1,5 @@ +# Only include requirements used in CI (no s3 or glue pyiceberg features). +numpy==2.2.0 +pandas==2.2.3 +pyarrow==17.0.0 +pyiceberg[sql-sqlite]==0.8.1 diff --git a/crates/iceberg/src/test/requirements.txt b/crates/iceberg/src/test/requirements.txt new file mode 100644 index 0000000000..cfd44ef9a5 --- /dev/null +++ b/crates/iceberg/src/test/requirements.txt @@ -0,0 +1,4 @@ +numpy==2.2.0 +pandas==2.2.3 +pyarrow==17.0.0 +pyiceberg[s3fs,glue,sql-sqlite]==0.8.1 diff --git a/crates/pipeline-manager/Cargo.toml b/crates/pipeline-manager/Cargo.toml index fe5642e9c5..216237b20d 100644 --- a/crates/pipeline-manager/Cargo.toml +++ b/crates/pipeline-manager/Cargo.toml @@ -36,7 +36,7 @@ utoipa = { version = "4.2", features = ["actix_extras", "chrono", "uuid"] } utoipa-swagger-ui = { version = "7.1", features = ["actix-web"] } chrono = { version = "0.4.38", default-features = false, features = ["clock", "serde"] } tempfile = { version = "3" } -futures-util = "0.3.28" +futures-util = "0.3.30" tokio-postgres = { version = "0.7", features = ["with-serde_json-1", "with-uuid-1", "with-chrono-0_4"]} async-trait = "0.1" colored = "2.0.0" diff --git a/crates/pipeline-manager/src/api/mod.rs b/crates/pipeline-manager/src/api/mod.rs index cb9979f2af..9fef6fafe9 100644 --- a/crates/pipeline-manager/src/api/mod.rs +++ b/crates/pipeline-manager/src/api/mod.rs @@ -204,7 +204,11 @@ The program version is used internally by the compiler to know when to recompile feldera_types::transport::delta_table::DeltaTableIngestMode, feldera_types::transport::delta_table::DeltaTableWriteMode, feldera_types::transport::delta_table::DeltaTableReaderConfig, - feldera_types::transport::delta_table::DeltaTableWriterConfig, + feldera_types::transport::iceberg::IcebergReaderConfig, + feldera_types::transport::iceberg::IcebergIngestMode, + feldera_types::transport::iceberg::IcebergCatalogType, + feldera_types::transport::iceberg::RestCatalogConfig, + feldera_types::transport::iceberg::GlueCatalogConfig, feldera_types::transport::http::Chunk, feldera_types::query::AdhocQueryArgs, feldera_types::query::AdHocResultFormat, diff --git a/crates/pipeline-manager/src/db/types/program.rs b/crates/pipeline-manager/src/db/types/program.rs index ccd023e774..4758f71c21 100644 --- a/crates/pipeline-manager/src/db/types/program.rs +++ b/crates/pipeline-manager/src/db/types/program.rs @@ -503,6 +503,7 @@ pub fn generate_program_info( | TransportConfig::UrlInput(_) | TransportConfig::S3Input(_) | TransportConfig::DeltaTableInput(_) + | TransportConfig::IcebergInput(_) | TransportConfig::Datagen(_) | TransportConfig::Nexmark(_) => {} _ => { diff --git a/docs/connectors/sources/delta.md b/docs/connectors/sources/delta.md index f682d33591..73aa08c469 100644 --- a/docs/connectors/sources/delta.md +++ b/docs/connectors/sources/delta.md @@ -1,5 +1,11 @@ # Delta Lake input connector +:::note +This page describes configuration options specific to the Delta Lake connector. +See [top-level connector documentation](/connectors/) for general information +about configuring input and output connectors. +::: + [Delta Lake](https://delta.io/) is an open-source storage framework for the [Lakehouse architecture](https://www.cidrdb.org/cidr2021/papers/cidr2021_paper17.pdf). It is typically used with the [Apache Spark](https://spark.apache.org/) runtime. @@ -61,8 +67,7 @@ one of `snapshot` or `snapshot_and_follow`, table rows are ingested in the times order, respecting the `LATENESS` annotation on the column: each ingested row has a timestamp no more than `LATENESS` time units earlier than the most recent timestamp of any previously ingested row. The ingestion is performed by partitioning the table -into timestamp ranges of width `LATENESS`. Each range is processed sequentially, -in increasing timestamp order. +into timestamp ranges of width `LATENESS` and ingesting ranges one by one in increasing timestamp order. Requirements: * The timestamp column must be of a supported type: integer, `DATE`, or `TIMESTAMP`. diff --git a/docs/connectors/sources/iceberg.md b/docs/connectors/sources/iceberg.md new file mode 100644 index 0000000000..afe6ef6a7b --- /dev/null +++ b/docs/connectors/sources/iceberg.md @@ -0,0 +1,318 @@ +# Apache Iceberg input connector + +:::note +This page describes configuration options specific to the Apache Iceberg connector. +See [top-level connector documentation](/connectors/) for general information +about configuring input and output connectors. +::: + +:::warning + +Iceberg support is still experimental, and it may be substantially modified in the future. + +::: + +The Iceberg input connector enables data ingestion from an Apache Iceberg table into +a Feldera pipeline. Currently, the connector supports batch reads, allowing users to +load a static snapshot of the table. However, it does not yet support ingesting +incremental changes. Incremental ingestion capabilities are planned for future releases. + +The connector is compatible with REST and AWS Glue catalogs and also supports direct +table reads without a catalog, provided the location of the metadata file. Supported +storage systems include S3, GCS, and local file systems. + +The Iceberg input connector does not yet support [fault tolerance](..#fault-tolerance). + + +## Configuration + +| Property | Type | Description | +|-----------------------------|--------|---------------| +| `mode`* | enum | Table read mode. Currently, the only supported mode is `snapshot`, in which the connector reads a snapshot of the table and stops.| +| `snapshot_filter` | string |

Optional row filter. When specified, only rows that satisfy the filter condition are included in the snapshot. The condition must be a valid SQL Boolean expression that can be used in the `where` clause of the `select * from snapshot where ..` query.

This option can be used to specify the range of event times to include in the snapshot, e.g.: `ts BETWEEN TIMESTAMP '2005-01-01 00:00:00' AND TIMESTAMP '2010-12-31 23:59:59'`.

+| `snapshot_id` | integer|

Optional table snapshot id. When this option is set, the connector reads the specified snapshot of the table.

Note: at most one of `version` and `datetime` options can be specified. When neither of the two options is specified, the latest snapshot of the table is used.

+| `datetime` | string |

Optional timestamp for the snapshot in the ISO-8601/RFC-3339 format, e.g., "2024-12-09T16:09:53+00:00". When this option is set, the connector reads the version of the table as of the specified point in time (based on the server time recorded in the transaction log, not the event time encoded in the data).

Note: at most one of `version` and `datetime` options can be specified. When neither of the two options is specified, the latest committed version of the table is used.

| +| `metadata_location` | string | Location of the table metadata JSON file. This propery is used to access an Iceberg table directly, without a catalog. It is mutually exclusive with the `catalog_type` property.| +| `table_name` | string | Specifies the Iceberg table name within the catalog in the `namespace.table` format. This option is applicable when an Iceberg catalog is configured using the `catalog_type` property.| +| `catalog_type` | enum | Type of the Iceberg catalog used to access the table. Supported options include `rest` and `glue`. This property is mutually exclusive with `metadata_location`.| + + + + +[*]: Required fields + +### Rest catalog configuration + +The following properties are used when `catalog_type` is set to `rest` to configure access to an Iceberg REST catalog. + +| Property | Type | Description | +|-----------------------------|---------------------|---------------| +| `rest.uri`* | string | URI identifying the REST catalog server| +| `rest.warehouse` | string | The default location for managed tables created by the catalog.| +| `rest.oauth2-server-uri` | string | Authentication URL to use for client credentials authentication (default: `uri` + `v1/oauth/tokens`)| +| `rest.credential` | string | Credential to use for OAuth2 credential flow when initializing the catalog. A key and secret pair separated by ":" (key is optional).| +| `rest.token` | string | Bearer token value to use for `Authorization` header.| +| `rest.scope` | string | Desired scope of the requested security token (default: catalog).| +| `rest.prefix` | string | Customize table storage paths. When combined with the `warehouse` property, the prefix determines how table data is organized within the storage.| +| `rest.audience` | string | Logical name of target resource or service.| +| `rest.resource` | string | URI for the target resource or service.| +| `rest.headers` | [(string, string)] | Additional HTTP request headers added to each catalog REST API call.| + +[*]: These fields are required when the `catalog_type` property is set to `rest`. + +### Glue catalog configuration + +The following properties are used when `catalog_type` is set to `glue` to configure access to the AWS Glue catalog. + +| Property | Type | Description | +|-----------------------------|--------|---------------| +| `glue.warehouse`* | string | Location for table metadata. Example: `s3://my-data-warehouse/tables/`| +| `glue.endpoint` | string | Configure an alternative endpoint of the Glue service for Glue catalog to access. Example: `https://glue.us-east-1.amazonaws.com`| +| `glue.access-key-id` | string | Access key id used to access the Glue catalog.| +| `glue.secret-access-key` | string | Secret access key used to access the Glue catalog.| +| `glue.profile-name` | string | Profile used to access the Glue catalog.| +| `glue.region` | string | Region of the Glue catalog.| +| `glue.session-token` | string | Static session token used to access the Glue catalog.| +| `glue.id` | string | The 12-digit ID of the Glue catalog.| + +[*]: These fields are required when the `catalog_type` property is set to `glue`. + +### FileIO configuration + +Iceberg works with the concept of a FileIO which is a pluggable module for reading, writing, and deleting files. +Feldera currently supports S3, GCS, and file system-based FileIO implementations. The Iceberg connector detects +the correct type of FileIO from the prefix of the Iceberg table location: + +* `s3://`, `s3a://` - S3. +* `gs://` - Google Cloud Storage. +* `file://` or no prefix - local file system. + +S3 and GCP FileIO implementations require additional configuration options documented below. + +#### S3 FileIO configuration + +| Property | Type | Description | +|-----------------------------|--------|---------------| +| `glue.warehouse`* | string | Location for table metadata. Example: `s3://my-data-warehouse/tables/`| +| `s3.access-key-id` | string | S3 access key id.| +| `s3.secret-access-key` | string | S3 secret access key.| +| `s3.endpoint` | string | Configure an alternative endpoint of the S3 service for the FileIO to access. This could be used to use S3 FileIO with any S3-compatible object storage service that has a different endpoint, or access a private S3 endpoint in a virtual private cloud.| +| `s3.region` | string | S3 region.| +| `s3.session-token` | string | S3 session token. This is required when using temporary credentials.| +| `s3.allow-anonymous` | string | Set to `"true"` to skip signing requests (e.g., for public buckets).| +| `s3.disable-ec2-metadata` | string | Set to `"true"` to skip loading the credential from EC2 metadata (typically used in conjunction with `s3.allow-anonymous`).| + +#### GCS FileIO configuration + +| Property | Type | Description | +|-----------------------------|--------|---------------| +| `gcs.project-id` | string | Google Cloud Project ID.| +| `gcs.service.path` | string | Google Cloud Storage endpoint.| +| `gcs.no-auth` | string | Set to `"true"` to allow unauthenticated requests.| +| `gcs.credentials-json` | string | Google Cloud Storage credentials JSON string, base64 encoded.| +| `gcs.oauth2.token` | string | String representation of the access token used for temporary access.| + +## Data type mapping + +The following table lists supported Iceberg data types and corresponding Feldera types. + +| Iceberg type | Feldera SQL type | Comment | +|-----------------------------|------------------|---------------| +| `boolean` | `BOOLEAN` | | +| `int` | `INT`   | | +| `long` | `BIGINT`   | | +| `float` | `REAL` | | +| `double` | `DOUBLE` | | +| `decimal(P,S)` | `DECIMAL(P, S)` | The largest supported precision `P` is 28.| +| `date` | `DATE` | | +| `time` | `TIME` | | +| `timestamp` | `TIMESTAMP` | Timestamp values are rounded to the nearest millisecond.| +| `timestamp_ns` | `TIMESTAMP` | Timestamp values are rounded to the nearest millisecond.| +| `string` | `STRING` | | +| `fixed(L)` | `BINARY(L)` | | +| `binary` | `VARBINARY` | | + + + + +Types that are currently not supported include Iceberg's nested data types (`struct`s, +`list`s and `map`s), `uuid`, and timestamps with time zone. + + + + + + +## Examples + +### Read an Iceberg table from S3 through the AWS Glue catalog + +Create an Iceberg input connector to read a snapshot of a table stored in an S3 bucket +through the [AWS Glue Catalog](https://docs.aws.amazon.com/glue/). Note that the connector +configuration specifies separate AWS credentials — including the access key ID, secret +access key, and region — for the AWS Glue Catalog and the S3 bucket containing the table +data. These credentials can either be the same, when using a single IAM identity for both +services, or different, when using separate IAM identities. + +```sql +create table iceberg_table( + id bigint, + name STRING, + b BOOLEAN, + ts TIMESTAMP, + dt DATE +) with ( + 'materialized' = 'true', + 'connectors' = '[{ + "transport": { + "name": "iceberg_input", + "config": { + "mode": "snapshot", + "glue.warehouse": "s3://feldera-iceberg-test/", + "catalog_type": "glue", + "table_name": "iceberg_test.test_table", + "glue.access-key-id": "", + "glue.secret-access-key": "", + "glue.region": "us-east-1", + "s3.access-key-id": "", + "s3.secret-access-key": "", + "s3.region": "us-east-1" + } + } + }]' +); +``` + +### Read an Iceberg table from S3 through a REST catalog + +Create an Iceberg input connector to read a snapshot of a table stored in an S3 bucket +through a REST catalog runnin on `http://localhost:8181`. + +```sql +create table iceberg_table( + id bigint, + name STRING, + b BOOLEAN, + ts TIMESTAMP, + dt DATE +) +with ( + 'materialized' = 'true', + 'connectors' = '[{ + "transport": { + "name": "iceberg_input", + "config": { + "mode": "snapshot", + "catalog_type": "rest", + "table_name": "iceberg_test.test_table", + "rest.uri": "http://localhost:8181", + "rest.warehouse": "s3://feldera-iceberg-test/", + "s3.access-key-id": "", + "s3.secret-access-key": "", + "s3.region": "us-east-1" + } + } + }]' +); +``` + +### Read an Iceberg table from local file system + +Read an Iceberg table from the local file system. Use the specified snapshot id. +Only select records with timestamp `2023-01-01 00:00:00` or later. + +```sql +create table iceberg_table( + id bigint, + name STRING, + b BOOLEAN, + ts TIMESTAMP, + dt DATE +) with ( + 'materialized' = 'true', + 'connectors' = '[{ + "transport": { + "name": "iceberg_input", + "config": { + "mode": "snapshot", + "metadata_location": "file:///tmp/warehouse/test_table/metadata/00001-26093ae9-b816-40ca-8ca4-05bd445a8a1d.metadata.json", + "snapshot_id": 3325185130458326470, + "snapshot_filter": "ts >= ''2023-01-01 00:00:00''", + } + } + }]' +); +``` \ No newline at end of file diff --git a/docs/connectors/sources/s3.md b/docs/connectors/sources/s3.md index 74a3d23e2b..bc7483a633 100644 --- a/docs/connectors/sources/s3.md +++ b/docs/connectors/sources/s3.md @@ -11,8 +11,9 @@ It can be configured to load a single object or multiple objects selected based common S3 prefix. :::tip -When accessing an S3 bucket that stores data in the Delta Lake format, consider -using the [Delta Lake connector](/connectors/sources/delta) instead. +When accessing an S3 bucket that stores data in the Delta Lake or Iceberg format, consider +using the [Delta Lake connector](/connectors/sources/delta) or the +[Iceberg connector](/connectors/sources/iceberg) connector instead. ::: The S3 input connector supports [fault tolerance](..#fault-tolerance). diff --git a/docs/sidebars.js b/docs/sidebars.js index b5b1707ac2..4d221f7ed2 100644 --- a/docs/sidebars.js +++ b/docs/sidebars.js @@ -191,6 +191,11 @@ const sidebars = { id: 'connectors/sources/delta', label: 'Delta Lake' }, + { + type: 'doc', + id: 'connectors/sources/iceberg', + label: 'Apache Iceberg' + }, { type: 'doc', id: 'connectors/sources/kafka', diff --git a/openapi.json b/openapi.json index 689052183c..fe35f89860 100644 --- a/openapi.json +++ b/openapi.json @@ -2375,7 +2375,7 @@ }, "DeltaTableReaderConfig": { "type": "object", - "description": "Delta table output connector configuration.", + "description": "Delta table input connector configuration.", "required": [ "uri", "mode" @@ -2383,7 +2383,7 @@ "properties": { "datetime": { "type": "string", - "description": "Optional timestamp for the snapshot in the ISO-8601/RFC-3339 format, e.g.,\n\"2024-12-09T16:09:53+00:00.\n\nWhen this option is set, the connector finds and opens the version of the table as of the\nspecified point in time. In `snapshot` and `snapshot_and_follow` modes, it retrieves the\nsnapshot of this version of the table (based on the server time recorded in the transaction\nlog, not the event time encoded in the data). In `follow` and `snapshot_and_follow` modes, it\nfollows transaction log records **after** this version.\n\nNote: at most one of `version` and `datetime` options can be specified.\nWhen neither of the two options is specified, the latest committed version of the table\nis used.", + "description": "Optional timestamp for the snapshot in the ISO-8601/RFC-3339 format, e.g.,\n\"2024-12-09T16:09:53+00:00\".\n\nWhen this option is set, the connector finds and opens the version of the table as of the\nspecified point in time (based on the server time recorded in the transaction log, not the\nevent time encoded in the data). In `snapshot` and `snapshot_and_follow` modes, it\nretrieves the snapshot of this version of the table. In `follow` and `snapshot_and_follow`\nmodes, it follows transaction log records **after** this version.\n\nNote: at most one of `version` and `datetime` options can be specified.\nWhen neither of the two options is specified, the latest committed version of the table\nis used.", "nullable": true }, "mode": { @@ -2424,26 +2424,6 @@ "error_if_exists" ] }, - "DeltaTableWriterConfig": { - "type": "object", - "description": "Delta table output connector configuration.", - "required": [ - "uri" - ], - "properties": { - "mode": { - "$ref": "#/components/schemas/DeltaTableWriteMode" - }, - "uri": { - "type": "string", - "description": "Table URI." - } - }, - "additionalProperties": { - "type": "string", - "description": "Storage options for configuring backend object store.\n\nFor specific options available for different storage backends, see:\n* [Azure options](https://docs.rs/object_store/latest/object_store/azure/enum.AzureConfigKey.html)\n* [Amazon S3 options](https://docs.rs/object_store/latest/object_store/aws/enum.AmazonS3ConfigKey.html)\n* [Google Cloud Storage options](https://docs.rs/object_store/latest/object_store/gcp/enum.GoogleConfigKey.html)" - } - }, "Demo": { "type": "object", "required": [ @@ -2649,6 +2629,51 @@ } } }, + "GlueCatalogConfig": { + "type": "object", + "description": "AWS Glue catalog config.", + "properties": { + "glue.access-key-id": { + "type": "string", + "description": "Access key id used to access the Glue catalog.", + "nullable": true + }, + "glue.endpoint": { + "type": "string", + "description": "Configure an alternative endpoint of the Glue service for Glue catalog to access.\n\nExample: `\"https://glue.us-east-1.amazonaws.com\"`", + "nullable": true + }, + "glue.id": { + "type": "string", + "description": "The 12-digit ID of the Glue catalog.", + "nullable": true + }, + "glue.profile-name": { + "type": "string", + "description": "Profile used to access the Glue catalog.", + "nullable": true + }, + "glue.region": { + "type": "string", + "description": "Region of the Glue catalog.", + "nullable": true + }, + "glue.secret-access-key": { + "type": "string", + "description": "Secret access key used to access the Glue catalog.", + "nullable": true + }, + "glue.session-token": { + "type": "string", + "nullable": true + }, + "glue.warehouse": { + "type": "string", + "description": "Location for table metadata.\n\nExample: `\"s3://my-data-warehouse/tables/\"`", + "nullable": true + } + } + }, "HttpInputConfig": { "type": "object", "description": "Configuration for reading data via HTTP.\n\nHTTP input adapters cannot be usefully configured as part of pipeline\nconfiguration. Instead, instantiate them through the REST API as\n`/pipelines/{pipeline_name}/ingress/{table_name}`.", @@ -2667,6 +2692,87 @@ } } }, + "IcebergCatalogType": { + "type": "string", + "enum": [ + "rest", + "glue" + ] + }, + "IcebergIngestMode": { + "type": "string", + "description": "Iceberg table read mode.\n\nThree options are available:\n\n* `snapshot` - read a snapshot of the table and stop.\n\n* `follow` - continuously ingest changes to the table, starting from a specified snapshot\nor timestamp.\n\n* `snapshot_and_follow` - read a snapshot of the table before switching to continuous ingestion\nmode.", + "enum": [ + "snapshot", + "follow", + "snapshot_and_follow" + ] + }, + "IcebergReaderConfig": { + "allOf": [ + { + "$ref": "#/components/schemas/GlueCatalogConfig" + }, + { + "$ref": "#/components/schemas/RestCatalogConfig" + }, + { + "type": "object", + "required": [ + "mode" + ], + "properties": { + "catalog_type": { + "allOf": [ + { + "$ref": "#/components/schemas/IcebergCatalogType" + } + ], + "nullable": true + }, + "datetime": { + "type": "string", + "description": "Optional timestamp for the snapshot in the ISO-8601/RFC-3339 format, e.g.,\n\"2024-12-09T16:09:53+00:00\".\n\nWhen this option is set, the connector finds and opens the snapshot of the table as of the\nspecified point in time (based on the server time recorded in the transaction\nlog, not the event time encoded in the data). In `snapshot` and `snapshot_and_follow`\nmodes, it retrieves this snapshot. In `follow` and `snapshot_and_follow` modes, it\nfollows transaction log records **after** this snapshot.\n\nNote: at most one of `snapshot_id` and `datetime` options can be specified.\nWhen neither of the two options is specified, the latest committed version of the table\nis used.", + "nullable": true + }, + "metadata_location": { + "type": "string", + "description": "Location of the table metadata JSON file.\n\nThis propery is used to access an Iceberg table without a catalog. It is mutually\nexclusive with the `catalog_type` property.", + "nullable": true + }, + "mode": { + "$ref": "#/components/schemas/IcebergIngestMode" + }, + "snapshot_filter": { + "type": "string", + "description": "Optional row filter.\n\nThis option is only valid when `mode` is set to `snapshot` or `snapshot_and_follow`.\n\nWhen specified, only rows that satisfy the filter condition are included in the\nsnapshot. The condition must be a valid SQL Boolean expression that can be used in\nthe `where` clause of the `select * from snapshot where ...` query.\n\nThis option can be used to specify the range of event times to include in the snapshot,\ne.g.: `ts BETWEEN '2005-01-01 00:00:00' AND '2010-12-31 23:59:59'`.", + "nullable": true + }, + "snapshot_id": { + "type": "integer", + "format": "int64", + "description": "Optional snapshot id.\n\nWhen this option is set, the connector finds the specified snapshot of the table.\nIn `snapshot` and `snapshot_and_follow` modes, it loads this snapshot.\nIn `follow` and `snapshot_and_follow` modes, it follows table updates\n**after** this snapshot.\n\nNote: at most one of `snapshot_id` and `datetime` options can be specified.\nWhen neither of the two options is specified, the latest committed version of the table\nis used.", + "nullable": true + }, + "table_name": { + "type": "string", + "description": "Specifies the Iceberg table name in the \"namespace.table\" format.\n\nThis option is applicable when an Iceberg catalog is configured using the `catalog_type` property.", + "nullable": true + }, + "timestamp_column": { + "type": "string", + "description": "Table column that serves as an event timestamp.\n\nWhen this option is specified, and `mode` is one of `snapshot` or `snapshot_and_follow`,\ntable rows are ingested in the timestamp order, respecting the\n[`LATENESS`](https://docs.feldera.com/sql/streaming#lateness-expressions)\nproperty of the column: each ingested row has a timestamp no more than `LATENESS`\ntime units earlier than the most recent timestamp of any previously ingested row.\nThe ingestion is performed by partitioning the table into timestamp ranges of width\n`LATENESS`. Each range is processed sequentially, in increasing timestamp order.\n\n# Example\n\nConsider a table with timestamp column of type `TIMESTAMP` and lateness attribute\n`INTERVAL 1 DAY`. Assuming that the oldest timestamp in the table is\n`2024-01-01T00:00:00``, the connector will fetch all records with timestamps\nfrom `2024-01-01`, then all records for `2024-01-02`, `2024-01-03`, etc., until all records\nin the table have been ingested.\n\n# Requirements\n\n* The timestamp column must be of a supported type: integer, `DATE`, or `TIMESTAMP`.\n* The timestamp column must be declared with non-zero `LATENESS`.\n* For efficient ingest, the table must be optimized for timestamp-based\nqueries using partitioning, Z-ordering, or liquid clustering.", + "nullable": true + } + }, + "additionalProperties": { + "type": "string", + "description": "Storage options for configuring backend object store.\n\nSee the [list of available options in PyIceberg documentation](https://py.iceberg.apache.org/configuration/#fileio)." + } + } + ], + "description": "Iceberg input connector configuration." + }, "InputEndpointConfig": { "allOf": [ { @@ -3827,6 +3933,74 @@ } } }, + "RestCatalogConfig": { + "type": "object", + "description": "Iceberg REST catalog config.", + "properties": { + "rest.audience": { + "type": "string", + "description": "Logical name of target resource or service.", + "nullable": true + }, + "rest.credential": { + "type": "string", + "description": "Credential to use for OAuth2 credential flow when initializing the catalog.\n\nA key and secret pair separated by \":\" (key is optional).", + "nullable": true + }, + "rest.headers": { + "type": "array", + "items": { + "type": "array", + "items": { + "allOf": [ + { + "type": "string" + }, + { + "type": "string" + } + ] + } + }, + "description": "Additional HTTP request headers added to each catalog REST API call.", + "nullable": true + }, + "rest.oauth2-server-uri": { + "type": "string", + "description": "Authentication URL to use for client credentials authentication (default: uri + 'v1/oauth/tokens')", + "nullable": true + }, + "rest.prefix": { + "type": "string", + "description": "Customize table storage paths.\n\nWhen combined with the `warehouse` property, the prefix determines\nhow table data is organized within the storage.", + "nullable": true + }, + "rest.resource": { + "type": "string", + "description": "URI for the target resource or service.", + "nullable": true + }, + "rest.scope": { + "type": "string", + "nullable": true + }, + "rest.token": { + "type": "string", + "description": "Bearer token value to use for `Authorization` header.", + "nullable": true + }, + "rest.uri": { + "type": "string", + "description": "URI identifying the REST catalog server.", + "nullable": true + }, + "rest.warehouse": { + "type": "string", + "description": "The default location for managed tables created by the catalog.", + "nullable": true + } + } + }, "RngFieldSettings": { "type": "object", "description": "Configuration for generating random data for a field of a table.", @@ -4460,6 +4634,24 @@ } } }, + { + "type": "object", + "required": [ + "name", + "config" + ], + "properties": { + "config": { + "$ref": "#/components/schemas/IcebergReaderConfig" + }, + "name": { + "type": "string", + "enum": [ + "iceberg_input" + ] + } + } + }, { "type": "object", "required": [